View Single Post
Old 08-15-2022, 02:20 PM   #23
bugmen00t
Connoisseur
bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!
 
bugmen00t's Avatar
 
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
New recipes (part 11 of ??)

More Russian and Ukrainian sources.

Cвободное пространство: news aggregator from "Novaya Gazeta" editors team. Favicon replacemet.
Fixes needed:
  • No images in articles (.webp)
  • Poor text formatting
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class NovayaGazeta(BasicNewsRecipe):
    title           	  = '\u0421\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0435 \u043F\u0440\u043E\u0441\u0442\u0440\u0430\u043D\u0441\u0442\u0432\u043E'
    __author__            = 'bugmen00t'
    description           = '\u041E\u0442\u043A\u0440\u044B\u0442\u044B\u0439 \u043A\u043E\u043D\u0441\u0442\u0440\u0443\u043A\u0442\u043E\u0440 \u043A\u043E\u043D\u0442\u0435\u043D\u0442\u0430, \u043F\u043E\u0434\u0433\u043E\u0442\u043E\u0432\u043B\u0435\u043D\u043D\u044B\u0439 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u043E\u0439 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u0435\u0439 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB, \u0434\u0435\u0442\u0430\u043B\u0438 \u043A\u043E\u0442\u043E\u0440\u043E\u0433\u043E \u043A\u0430\u0436\u0434\u044B\u0439 \u0438\u0437 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u0435\u0439 \u0441\u043E\u0431\u0438\u0440\u0430\u0435\u0442 \u0434\u043B\u044F \u0441\u0435\u0431\u044F \u0441\u0430\u043C. \u041A\u043E\u043D\u0441\u0442\u0440\u0443\u043A\u0442\u043E\u0440 \u2014 \u044D\u0442\u043E \u043D\u043E\u0432\u044B\u0439 \u0441\u043F\u043E\u0441\u043E\u0431 \u0447\u0438\u0442\u0430\u0442\u044C \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB \u0438 \u0432\u0441\u0435 \u043E\u0441\u043D\u043E\u0432\u043D\u044B\u0435 \u043F\u043E\u0442\u043E\u043A\u0438 \u043A\u043E\u043D\u0442\u0435\u043D\u0442\u0430: \u043B\u044E\u0431\u044B\u0435 \u0432\u0435\u0431-\u0441\u0430\u0439\u0442\u044B, Telegram-\u043A\u0430\u043D\u0430\u043B\u044B, Youtube-\u043A\u0430\u043D\u0430\u043B\u044B.'
    publisher             = '\u0420\u0435\u0434\u0430\u043A\u0446\u0438\u043E\u043D\u043D\u044B\u0439 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB'
    category              = 'newspaper'
    cover_url = u'https://novaya.media/ic_puzzle_footer.svg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 30

    remove_tags_before = dict(name='h1', attrs={'class': 'Header_title__2zxwH'})

    remove_tags_after = dict(name='div', attrs={'class': 'Post_contentInnerWrapper__3GWHl'})

    feeds = [
        ('\u0421\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0435 \u043F\u0440\u043E\u0441\u0442\u0440\u0430\u043D\u0441\u0442\u0432\u043E', 'https://novaya.media/feed/rss')
        ]
        
    def print_version(self, url):
        return url + '?print=true'



KyivPost: Ukraine’s English-language newspaper. Favicon.
Fixes needed:
  • No lead images
English version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class KyivPost(BasicNewsRecipe):
    title           	  = 'Kyiv Post'
    __author__            = 'bugmen00t'
    description           = 'The Kyiv Post is Ukraine\u2019s English-language newspaper. The newspaper\u2019s first print edition came out on Oct. 18, 1995, and went online in 1997. The newspaper\u2019s motto is \u201CUkraine\u2019s Global Voice,\u201D which in 2018 replaced the previous motto of \u201CIndependence. Community. Trust.\u201D Both slogans reflect the newspaper\u2019s commitment to the highest journalistic and ethical standards.'
    publisher             = 'BIZNESGRUPP TOV'
    category              = 'newspaper'
    cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg'
    language              = 'en_UK'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article', attrs={'class': 'article'})

    remove_tags_after = dict(name='article', attrs={'class': 'article'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'entry-footer hide_post_header'})
        ] 

    feeds = [
        ('News', 'https://www.kyivpost.com/feed')
        ]

Ukrainian version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class KyivPost(BasicNewsRecipe):
    title           	  = 'Kyiv Post'
    __author__            = 'bugmen00t'
    description           = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u0430\u043D\u0433\u043B\u043E\u043C\u043E\u0432\u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u0430, \u043D\u0430\u0433\u043E\u0440\u043E\u0434\u0436\u0435\u043D\u0430 \u0443 2014 \u0440\u043E\u043A\u0443. \u041F\u0435\u0440\u0448\u0438\u0439 \u0434\u0440\u0443\u043A\u043E\u0432\u0430\u043D\u0438\u0439 \u043F\u0440\u0438\u043C\u0456\u0440\u043D\u0438\u043A \u0433\u0430\u0437\u0435\u0442\u0438 \u0432\u0438\u0439\u0448\u043E\u0432 18 \u0436\u043E\u0432\u0442\u043D\u044F 1995 \u0440\u043E\u043A\u0443, \u0430 \u0432 \u0406\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0430\u0440\u0456\u0430\u043D\u0442\u0456 Kyiv Post \u0437\u2019\u044F\u0432\u0438\u043B\u0430\u0441\u044C \u0432 1997 \u0440\u043E\u0446\u0456. \u0414\u0435\u0432\u0456\u0437 \u0433\u0430\u0437\u0435\u0442\u0438: \u00AB\u0413\u043B\u043E\u0431\u0430\u043B\u044C\u043D\u0438\u0439 \u0433\u043E\u043B\u043E\u0441 \u0423\u043A\u0440\u0430\u0457\u043D\u0438\u00BB, \u044F\u043A\u0438\u0439 \u0443 2018 \u0440\u043E\u0446\u0456 \u0437\u0430\u043C\u0456\u043D\u0438\u0432 \u043F\u043E\u043F\u0435\u0440\u0435\u0434\u043D\u0456\u0439 \u0434\u0435\u0432\u0456\u0437 \u00AB\u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0456\u0441\u0442\u044C. \u0421\u043F\u0456\u043B\u044C\u043D\u043E\u0442\u0430. \u0414\u043E\u0432\u0456\u0440\u0430.\u00BB \u041E\u0431\u0438\u0434\u0432\u0430 \u0433\u0430\u0441\u043B\u0430 \u0432\u0456\u0434\u043E\u0431\u0440\u0430\u0436\u0430\u044E\u0442\u044C \u0442\u0435, \u0449\u043E \u0443 \u0432\u0438\u0434\u0430\u043D\u043D\u0456  \u0434\u043E\u0442\u0440\u0438\u043C\u0443\u044E\u0442\u044C\u0441\u044F \u043D\u0430\u0439\u0432\u0438\u0449\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0441\u044C\u043A\u0438\u0445 \u0442\u0430 \u0435\u0442\u0438\u0447\u043D\u0438\u0445 \u0441\u0442\u0430\u043D\u0434\u0430\u0440\u0442\u0456\u0432.'
    publisher             = 'BIZNESGRUPP TOV'
    category              = 'newspaper'
    cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg'
    language              = 'uk'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article', attrs={'class': 'article'})

    remove_tags_after = dict(name='article', attrs={'class': 'article'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'entry-footer hide_post_header'})
        ] 

    feeds = [
        ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.kyivpost.com/uk/feed')
        ]

Arabic version (not sure if output is correct as the text is not RTL):
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class KyivPost(BasicNewsRecipe):
    title           	  = '\u0643\u064A\u064A\u0641 \u0628\u0648\u0633\u062A (Kyiv Post)'
    __author__            = 'bugmen00t'
    description           = '\u0643\u064A\u064A\u0641 \u0628\u0648\u0633\u062A \u0647\u064A \u0635\u062D\u064A\u0641\u0629 \u0623\u0648\u0643\u0631\u0627\u0646\u064A\u0629 \u062A\u0635\u062F\u0631 \u0628\u0627\u0644\u0644\u063A\u0629 \u0627\u0644\u0625\u0646\u062C\u0644\u064A\u0632\u064A\u0629 \u0648\u0647\u064A \u0641\u0627\u0626\u0632\u0629 \u0628\u0645\u064A\u062F\u0627\u0644\u064A\u0629 Missouri Honor 2014 \u0644\u0644\u062E\u062F\u0645\u0629 \u0627\u0644\u0645\u062A\u0645\u064A\u0632\u0629 \u0641\u064A \u0627\u0644\u0635\u062D\u0627\u0641\u0629. \u0635\u062F\u0631\u062A \u0627\u0644\u0646\u0633\u062E\u0629 \u0627\u0644\u0645\u0637\u0628\u0648\u0639\u0629 \u0627\u0644\u0623\u0648\u0644\u0649 \u0645\u0646 \u0627\u0644\u0635\u062D\u064A\u0641\u0629 \u0641\u064A 18 \u0623\u0643\u062A\u0648\u0628\u0631 1995\u060C \u0648\u062A\u0645 \u0646\u0634\u0631\u0647\u0627 \u0639\u0644\u0649 \u0627\u0644\u0625\u0646\u062A\u0631\u0646\u062A \u0641\u064A \u0639\u0627\u0645 1997. \u0648\u0643\u0627\u0646 \u062C\u0645\u0647\u0648\u0631\u0647\u0627 \u0627\u0644\u0639\u0627\u0644\u0645\u064A \u064A\u0646\u0645\u0648 \u0628\u0627\u0637\u0631\u0627\u062F \u0645\u0646\u0630 \u0630\u0644\u0643 \u0627\u0644\u062D\u064A\u0646 \u060C \u0648\u0628\u0644\u063A \u0630\u0631\u0648\u062A\u0647 \u0628\u0623\u0643\u062B\u0631 \u0645\u0646 65 \u0645\u0644\u064A\u0648\u0646 \u0645\u0634\u0627\u0647\u062F\u0629 \u0644\u0644\u0635\u0641\u062D\u0629 \u0641\u064A \u0639\u0627\u0645 2014. \u0634\u0639\u0627\u0631 \u0627\u0644\u0635\u062D\u064A\u0641\u0629 \u0647\u0648 \u201C\u0635\u0648\u062A \u0623\u0648\u0643\u0631\u0627\u0646\u064A\u0627 \u0644\u0644\u0639\u0627\u0644\u0645\u201D\u060C \u0648\u0627\u0644\u0630\u064A \u062D\u0644 \u0641\u064A \u0639\u0627\u0645 2018 \u0645\u062D\u0644 \u0627\u0644\u0634\u0639\u0627\u0631 \u0627\u0644\u0633\u0627\u0628\u0642 \u201C\u0627\u0644\u0627\u0633\u062A\u0642\u0644\u0627\u0644. \u062A\u0648\u0627\u0635\u0644 \u0627\u062C\u062A\u0645\u0627\u0639\u064A. \u0627\u0644\u062B\u0642\u0629\u201D. \u0643\u0644\u0627 \u0627\u0644\u0634\u0639\u0627\u0631\u064A\u0646 \u064A\u0639\u0643\u0633\u0627\u0646 \u0627\u0644\u062A\u0632\u0627\u0645 \u0627\u0644\u0635\u062D\u064A\u0641\u0629 \u0628\u0623\u0639\u0644\u0649 \u0627\u0644\u0645\u0639\u0627\u064A\u064A\u0631 \u0627\u0644\u0635\u062D\u0641\u064A\u0629 \u0648\u0627\u0644\u0623\u062E\u0644\u0627\u0642\u064A\u0629.'
    publisher             = 'BIZNESGRUPP TOV'
    category              = 'newspaper'
    cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg'
    language              = 'ar'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article', attrs={'class': 'article'})

    remove_tags_after = dict(name='article', attrs={'class': 'article'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'entry-footer hide_post_header'})
        ] 

    feeds = [
        ('\u0627\u0644\u0625\u062E\u0628\u0627\u0631\u064A\u0629', 'https://www.kyivpost.com/ar/feed')
        ]



The Moscow Times: English- and Russian-language online newspaper. Favicon.
Russain version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class MoscowTimes(BasicNewsRecipe):
    title           	  = 'The Moscow Times'
    __author__            = 'bugmen00t'
    description           = '\u0410\u043D\u0433\u043B\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0420\u043E\u0441\u0441\u0438\u0438. \u041D\u0430\u0448\u0430 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F \u0434\u0435\u043B\u0438\u0442\u0441\u044F \u0441 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u043C\u0438 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u043C\u0438 \u0438 \u0441\u0430\u043C\u044B\u043C\u0438 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u043C\u0438 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0441\u0430\u043C\u043E\u0439 \u0431\u043E\u043B\u044C\u0448\u043E\u0439 \u0441\u0442\u0440\u0430\u043D\u044B \u043D\u0430 \u0417\u0435\u043C\u043B\u0435.'
    publisher             = '\u041E\u041E\u041E \u0422\u0438\u044D\u043C\u0442\u0438'
    category              = 'newspaper'
    cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 50

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='div', attrs={'class': 'article__bottom'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social'}),
        dict(name='div', attrs={'class': 'related-article__content'})
        ] 

    feeds = [
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.moscowtimes.ru/rss/news'),
        ('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://www.moscowtimes.ru/rss/opinion'),
        ('\u041F\u0435\u0440\u0435\u0432\u043E\u0434\u044B Financial Times', 'https://www.moscowtimes.ru/rss/ft')
        ]
        
    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-src': True}):
            img['src'] = img['data-src']
        return soup

English version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class MoscowTimes(BasicNewsRecipe):
    title           	  = 'The Moscow Times'
    __author__            = 'bugmen00t'
    description           = 'The Moscow Times is Russia’s leading, independent English-language media outlet. Our team of Russian and English journalists provide readers across the world with breaking news, engaging stories and balanced reporting about the largest country on Earth.'
    publisher             = 'Tiamti LLC'
    category              = 'newspaper'
    cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg'
    language              = 'en_RU'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 14
    max_articles_per_feed = 50

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='div', attrs={'class': 'article__tags'})

    remove_tags =   [
        dict(name='aside'),
        dict(name='footer'),
        dict(name='section', attrs={'class': 'cluster'}),
        dict(name='div', attrs={'class': 'article__tags'}),
        dict(name='div', attrs={'class': 'social'}),
        dict(name='div', attrs={'class': 'related-article__content'})
        ] 

    feeds = [
        ('News', 'https://www.themoscowtimes.com/rss/news'),
        ('Opinion', 'https://www.themoscowtimes.com/rss/opinion'),
        ('Arts and Life', 'https://www.themoscowtimes.com/rss/city'),
        ('Meanwhile', 'https://www.themoscowtimes.com/rss/meanwhile')
        ]



Довод: local news from Vladimir city & local regions. Favicon.
Fixes needed:
  • No lead image
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Dovod(BasicNewsRecipe):
    title           	  = '\u0414\u043E\u0432\u043E\u0434'
    __author__            = 'bugmen00t'
    description           = '\u0421\u0430\u0439\u0442 \u00AB\u0414\u043E\u0432\u043E\u0434\u00BB \u043F\u043E\u043B\u0443\u0447\u0438\u043B \u0441\u0432\u043E\u0451 \u043D\u0430\u0437\u0432\u0430\u043D\u0438\u0435 \u0432 \u0447\u0435\u0441\u0442\u044C \u0440\u0430\u043D\u0435\u0435 \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0435\u043A\u0442\u0430 \u00AB\u0414\u043E\u0432\u043E\u0434 \u2014 \u0412\u043B\u0430\u0434\u0438\u043C\u0438\u0440\u0441\u043A\u0438\u0435 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0435 \u0434\u0438\u0441\u043A\u0443\u0441\u0441\u0438\u0438\u00BB. \u041D\u0430\u0448\u0430 \u0446\u0435\u043B\u044C \u2014 \u043E\u0441\u0432\u0435\u0449\u0435\u043D\u0438\u0435 \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0445 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0432\u043E\u043F\u0440\u043E\u0441\u043E\u0432 \u0438 \u0438\u0445 \u043E\u0442\u043A\u0440\u043E\u0432\u0435\u043D\u043D\u043E\u0435 \u0438 \u043E\u0441\u043C\u044B\u0441\u043B\u0435\u043D\u043D\u043E\u0435 \u043E\u0431\u0441\u0443\u0436\u0434\u0435\u043D\u0438\u0435.'
    publisher             = '\u0418\u043B\u044C\u044F \u041A\u043E\u0441\u044B\u0433\u0438\u043D, \u041A\u0438\u0440\u0438\u043B\u043B \u0418\u0448\u0443\u0442\u0438\u043D'
    category              = 'blog'
    cover_url = u'https://pbs.twimg.com/profile_images/1498229545505284099/l9V1l59Z_400x400.jpg'
    language              = 'ru'
    no_stylesheets        = True
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='div', attrs={'class': 'entry-content'})

    feeds = [
        ('\u0414\u043E\u0432\u043E\u0434', 'https://www.dovod.online/feed/')
        ]
Attached Images
       
Attached Files
File Type: recipe novaya_media.recipe (2.9 KB, 515 views)
File Type: recipe kyivpost_en.recipe (1.4 KB, 511 views)
File Type: recipe kyivpost_ua.recipe (2.9 KB, 505 views)
File Type: recipe kyivpost_ar.recipe (3.4 KB, 511 views)
File Type: recipe moscowtimes_ru.recipe (2.5 KB, 514 views)
File Type: recipe moscowtimes_en.recipe (1.7 KB, 514 views)
File Type: recipe dovod.recipe (2.2 KB, 514 views)

Last edited by bugmen00t; 08-17-2022 at 09:16 AM.
bugmen00t is offline   Reply With Quote