View Single Post
Old 08-26-2022, 08:29 AM   #25
bugmen00t
Connoisseur
bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!
 
bugmen00t's Avatar
 
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
New recipes (part 13 of ??)

Ukrainian sources.

Радіо Свобода: Prague-based Ukrainian-language TV channel founded by RFE/RL. Favicon.
Fixes needed:
  • In most of the aticles some images are missing
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe, classes


class RadioSvoboda(BasicNewsRecipe):
    title = '\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430'
    __author__ = 'bugmen00t'
    description = '\u0420\u0430\u0434\u0456\u043E\u0441\u0442\u0430\u043D\u0446\u0456\u044F \u0442\u0430 \u0456\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0438\u0434\u0430\u043D\u043D\u044F, \u044F\u043A\u0430 \u043F\u043E\u0437\u0438\u0446\u0456\u043E\u043D\u0443\u0454 \u0441\u0435\u0431\u0435 \u044F\u043A \u043F\u0440\u0438\u0432\u0430\u0442\u043D\u0438\u0439 \u043D\u0435\u043A\u043E\u043C\u0435\u0440\u0446\u0456\u0439\u043D\u0438\u0439 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u043E-\u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0439 \u0437\u0430\u0441\u0456\u0431 \u043C\u0430\u0441\u043E\u0432\u043E\u0457 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0457.'
    publisher = '\u0420\u0430\u0434\u0456\u043E \u0412\u0456\u043B\u044C\u043D\u0430 \u0404\u0432\u0440\u043E\u043F\u0430/\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430 (\u0420\u0412\u0404/\u0420\u0421)'
    category = 'newspaper'
    cover_url = u'https://www.radiosvoboda.org/Content/responsive/RFE/uk-UA/img/top_logo_news.png'
    language = 'uk'
    no_stylesheets = False
    remove_javascript = False
    auto_cleanup = False
    remove_empty_feeds = True
    oldest_article = 7
    max_articles_per_feed = 20

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'id': 'article-content'})

    remove_tags =   [
        dict(name='p', attrs={'class': 'buttons btn--load-more'}),
        dict(name='div', attrs={'class': 'share--box'}),
        dict(name='div', attrs={'class': 'c-author__btns m-t-md'}),
        dict(name='div', attrs={'class': 'col-xs-12 col-md-4 pull-left live-blog-left'}),
        dict(name='aside'),
        dict(name='div', attrs={'class': 'c-blockquote c-blockquote--has-quote'}),
        dict(name='div', attrs={'class': 'media-block also-read'}),
        dict(name='div', attrs={'class': 'media-block-wrap'}),
        dict(name='div', attrs={'class': 'wsw__embed'}),
        dict(name='div', attrs={'id': 'comments'}),
        ]

    feeds = [
        ('\u0413\u043E\u043B\u043E\u0432\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438', 'https://www.radiosvoboda.org/api/zrqiteuuir'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u041A\u043E\u0440\u043E\u043D\u0430\u0432\u0456\u0440\u0443\u0441', 'https://www.radiosvoboda.org/api/zqk_myekpymv'),
        ('\u041C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438', 'https://www.radiosvoboda.org/api/zipkqejjki'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u041F\u043E\u043B\u0456\u0442\u0438\u043A\u0430', 'https://www.radiosvoboda.org/api/zppkoe-jkp'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u0415\u043A\u043E\u043D\u043E\u043C\u0456\u043A\u0430', 'https://www.radiosvoboda.org/api/zvpk_eo-kt'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u0421\u0443\u0441\u043F\u0456\u043B\u044C\u0441\u0442\u0432\u043E', 'https://www.radiosvoboda.org/api/zjmkreybko'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u041F\u043E\u0434\u0456\u0457', 'https://www.radiosvoboda.org/api/ztpkyei-kr'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u0421\u0432\u043E\u0431\u043E\u0434\u0430 \u0441\u043B\u043E\u0432\u0430', 'https://www.radiosvoboda.org/api/zmrjoebtjo'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u041A\u043E\u0440\u0443\u043F\u0446\u0456\u044F', 'https://www.radiosvoboda.org/api/zbobpoetgbpi'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u0412\u0430\u0436\u043B\u0438\u0432\u0456', 'https://www.radiosvoboda.org/api/zmyqmvebromr'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438: \u0421\u043F\u043E\u0440\u0442', 'https://www.radiosvoboda.org/api/ztpmmyei-mmy'),
        ('\u041F\u0443\u0431\u043B\u0456\u043A\u0430\u0446\u0456\u0457', 'https://www.radiosvoboda.org/api/ziibp_ejgbpy'),
        ('\u041F\u043E\u043B\u0456\u0442\u0438\u043A\u0430', 'https://www.radiosvoboda.org/api/ziqioejuip'),
        ('\u0421\u0443\u0441\u043F\u0456\u043B\u044C\u0441\u0442\u0432\u043E', 'https://www.radiosvoboda.org/api/z_qi_erkiy'),
        ('\u0415\u043A\u043E\u043D\u043E\u043C\u0456\u043A\u0430', 'https://www.radiosvoboda.org/api/zpyp_e-rm_'),
        ('\u041A\u0443\u043B\u044C\u0442\u0443\u0440\u0430', 'https://www.radiosvoboda.org/api/zmqipebui_'),
        ('\u0423\u043A\u0440\u0430\u0457\u043D\u0430', 'https://www.radiosvoboda.org/api/zqqirekuiq'),
        ('\u0421\u0432\u0456\u0442', 'https://www.radiosvoboda.org/api/zoqiqeguii'),
        ('\u0412\u0430\u0436\u043B\u0438\u0432\u0435 \u043D\u0430 \u0421\u0432\u043E\u0431\u043E\u0434\u0456', 'https://www.radiosvoboda.org/api/zkk-iqemmbii'),
        ('\u0412\u0456\u0439\u043D\u0430', 'https://www.radiosvoboda.org/api/zijqpqej_opi'),
        ('\u0414\u043E\u043D\u0431\u0430\u0441', 'https://www.radiosvoboda.org/api/z_rppyeruppy'),
        ('\u041A\u0440\u0438\u043C', 'https://www.radiosvoboda.org/api/zbypmietqmmi'),
        ('\u0422\u043E\u0447\u043A\u0430 \u0437\u043E\u0440\u0443', 'https://www.radiosvoboda.org/api/zpqiie-uim'),
        ('\u041A\u043E\u0440\u043E\u043D\u0430\u0432\u0456\u0440\u0443\u0441', 'https://www.radiosvoboda.org/api/zok_mvegpymt'),
        ('\u0421\u043F\u043E\u0440\u0442', 'https://www.radiosvoboda.org/api/z-q_mpevk_m_'),
        ('\u041A\u0438\u0442\u0430\u0439: \u0432\u0438\u043A\u043B\u0438\u043A\u0438', 'https://www.radiosvoboda.org/api/zkyymqemqvmo'),
        ('\u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0430 \u0432\u0456\u0439\u043D\u0430', 'https://www.radiosvoboda.org/api/zuoppyepgppv'),
        ('\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430 Weekly', 'https://www.radiosvoboda.org/api/zb_rmyetrqmv'),
        ('\u041D\u043E\u0432\u0438\u043D\u0438 \u041F\u0440\u0438\u0430\u0437\u043E\u0432\u2019\u044F', 'https://www.radiosvoboda.org/api/zkkvmyemmtmy'),
        ('\u041F\u043E\u0434\u0456\u0457', 'https://www.radiosvoboda.org/api/z-ppp_ev-ppv'),
        ('\u0421\u0445\u0435\u043C\u0438', 'https://www.radiosvoboda.org/api/z-miprevbipo'),
        ('\u0421\u0445\u0435\u043C\u0438: \u0420\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F', 'https://www.radiosvoboda.org/api/ztvppoeiompo'),
        ('\u0421\u0445\u0435\u043C\u0438: \u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.radiosvoboda.org/api/zyibpreqjbpo')
        ]


Вікна: news program of Ukrainian TV-channel STB. Favicon.
Fixes needed:
  • Some images are missing
Russian version
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe, classes


class ViknaSTB(BasicNewsRecipe):
    title = '\u0412\u0438\u043A\u043D\u0430-\u043D\u043E\u0432\u043E\u0441\u0442\u0438'
    __author__ = 'bugmen00t'
    description = '\u0423\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0430\u044F \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u0430\u044F \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0430 \u0442\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B\u0430 \u00AB\u0421\u0422\u0411\u00BB.'
    publisher = '\u041E\u041E\u041E \u201C\u0421\u041B\u041C \u041D\u043E\u0432\u043E\u0441\u0442\u0438\u201D'
    category = 'newspaper'
    cover_url = u'https://yt3.ggpht.com/QZxhLPDcqhjMitCDPWbzk-Zs-ILbGo3ww1iuV9SIE1k0iqjoOIctT2d4BdyEsSAR6yWsbKbGWg=s900-c-k-c0x00ffffff-no-rj'
    language = 'ru_UK'
    no_stylesheets = False
    remove_javascript = False
    auto_cleanup = False
    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 20

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'class': 'content-wrapper'})

    remove_tags =   [
         dict(name='div', attrs={'class': 'share-content-wrapper flex-wrapper'}),
         dict(name='div', attrs={'class': 'sticky-wrapper'}),
         dict(name='div', attrs={'class': 'promo-wrapper'}),
         dict(name='div', attrs={'class': 'mn-wrapper'}),
         dict(name='div', attrs={'class': 'news-tags border-btm-grey'})
        ]

    feeds = [
        ('\u0412\u0456\u043A\u043D\u0430', 'https://vikna.tv/ru/feed/')
        ]

Ukrainian version
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe, classes


class ViknaSTB(BasicNewsRecipe):
    title = '\u0412\u0456\u043A\u043D\u0430-\u043D\u043E\u0432\u0438\u043D\u0438'
    __author__ = 'bugmen00t'
    description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0430 \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u0430 \u0442\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B\u0443 \u00AB\u0421\u0422\u0411\u00BB.'
    publisher = '\u0422\u041E\u0412 \u201C\u0421\u041B\u041C \u041D\u043E\u0432\u0438\u043D\u0438\u201D'
    category = 'newspaper'
    cover_url = u'https://yt3.ggpht.com/QZxhLPDcqhjMitCDPWbzk-Zs-ILbGo3ww1iuV9SIE1k0iqjoOIctT2d4BdyEsSAR6yWsbKbGWg=s900-c-k-c0x00ffffff-no-rj'
    language = 'uk'
    no_stylesheets = False
    remove_javascript = False
    auto_cleanup = False
    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 20

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'class': 'content-wrapper'})

    remove_tags =   [
         dict(name='div', attrs={'class': 'share-content-wrapper flex-wrapper'}),
         dict(name='div', attrs={'class': 'sticky-wrapper'}),
         dict(name='div', attrs={'class': 'promo-wrapper'}),
         dict(name='div', attrs={'class': 'mn-wrapper'}),
         dict(name='div', attrs={'class': 'news-tags border-btm-grey'})
        ]

    feeds = [
        ('\u0412\u0456\u043A\u043D\u0430', 'https://vikna.tv/feed/')
        ]


Gazeta.ua: web-wersion of Ukrainian newspaper covering politics, economics, culture, sport, arts, and other different topics. Favicon.
Fixes needed:
  • Thumbnails instead of full-size images in images set
  • Could not handle Telegram redirects and inlines
  • Minor in-text ads
Russian version
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe, classes

class GazetaUA(BasicNewsRecipe):
    title = 'Gazeta.ua'
    __author__ = 'bugmen00t'
    description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0430\u044F \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0432 \u043C\u0438\u0440\u0435: \u0441\u043E\u0431\u044B\u0442\u0438\u044F, \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.'
    publisher = '\u0418\u0437\u0434\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u0430\u044F \u0433\u0440\u0443\u043F\u043F\u0430 "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"'
    category = 'newspaper'
    cover_url = u'https://static2.gazeta.ua/img2/cache/preview/1064/1064219_w_450.jpg'
    language = 'ru_UK'
    no_stylesheets = False
    remove_javascript = False
    auto_cleanup = False
    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 50

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})

    remove_tags =   [
         dict(name='div', attrs={'class': 'mt5'}),
         dict(name='p', attrs={'id': 'mce_0'}),
         dict(name='p', attrs={'id': 'mce_1'}),
         dict(name='p', attrs={'id': 'mce_2'}),
         dict(name='p', attrs={'id': 'mce_3'}),
         dict(name='p', attrs={'id': 'mce_4'}),
         dict(name='p', attrs={'id': 'mce_5'}),
         dict(name='p', attrs={'id': 'mce_6'}),
         dict(name='p', attrs={'id': 'mce_7'}),
         dict(name='p', attrs={'id': 'mce_8'}),
         dict(name='p', attrs={'id': 'mce_9'}),
         dict(name='p', attrs={'id': 'mce_10'}),
         dict(name='p', attrs={'id': 'mce_11'}),
         dict(name='p', attrs={'id': 'mce_12'}),
         dict(name='p', attrs={'id': 'mce_13'}),
         dict(name='p', attrs={'id': 'mce_14'}),
         dict(name='p', attrs={'id': 'mce_15'}),
         dict(name='p', attrs={'id': 'mce_16'}),
         dict(name='section', attrs={'class': 'w-marker-photo lg'})
        ]

    feeds = [
        ('Gazeta.ua', 'https://gazeta.ua/ru/rss')
        ]

Ukrainian version
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe, classes

class GazetaUA(BasicNewsRecipe):
    title = 'Gazeta.ua'
    __author__ = 'bugmen00t'
    description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438 \u0437 \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0456 \u0441\u0432\u0456\u0442\u0443: \u043F\u043E\u0434\u0456\u0457, \u043F\u043E\u043B\u0456\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.'
    publisher = '\u0422\u041E\u0412 "\u0412\u0438\u0434\u0430\u0432\u043D\u0438\u0447\u0430 \u043A\u043E\u043C\u043F\u0430\u043D\u0456\u044F "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"'
    category = 'newspaper'
    cover_url = u'https://static2.gazeta.ua/img2/cache/preview/1064/1064219_w_450.jpg'
    language = 'uk'
    no_stylesheets = False
    remove_javascript = False
    auto_cleanup = False
    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 50

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})

    remove_tags =   [
         dict(name='div', attrs={'class': 'mt5'}),
         dict(name='p', attrs={'id': 'mce_0'}),
         dict(name='p', attrs={'id': 'mce_1'}),
         dict(name='p', attrs={'id': 'mce_2'}),
         dict(name='p', attrs={'id': 'mce_3'}),
         dict(name='p', attrs={'id': 'mce_4'}),
         dict(name='p', attrs={'id': 'mce_5'}),
         dict(name='p', attrs={'id': 'mce_6'}),
         dict(name='p', attrs={'id': 'mce_7'}),
         dict(name='p', attrs={'id': 'mce_8'}),
         dict(name='p', attrs={'id': 'mce_9'}),
         dict(name='p', attrs={'id': 'mce_10'}),
         dict(name='p', attrs={'id': 'mce_11'}),
         dict(name='p', attrs={'id': 'mce_12'}),
         dict(name='p', attrs={'id': 'mce_13'}),
         dict(name='p', attrs={'id': 'mce_14'}),
         dict(name='p', attrs={'id': 'mce_15'}),
         dict(name='p', attrs={'id': 'mce_16'}),
         dict(name='section', attrs={'class': 'w-marker-photo lg'})
        ]

    feeds = [
        ('Gazeta.ua', 'https://gazeta.ua/rss')
        ]


Fix for Новая Вкладка (newtab.recipe): updated RSS URL.

Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe


class NewTab(BasicNewsRecipe):
    title = '\u041D\u043E\u0432\u0430\u044F \u0432\u043A\u043B\u0430\u0434\u043A\u0430'
    __author__ = 'bugmen00t'
    description = '\u041F\u043B\u043E\u0449\u0430\u0434\u043A\u0430 \u0434\u043B\u044F \u043F\u0443\u0431\u043B\u0438\u043A\u0430\u0446\u0438\u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0435\u0439 \u0438 \u0437\u0430\u043C\u0435\u0442\u043E\u043A \u043E \u0442\u043E\u043C, \u043A\u0430\u043A \u043C\u0435\u043D\u044F\u044E\u0442\u0441\u044F \u0440\u0435\u0433\u0438\u043E\u043D\u044B \u0420\u043E\u0441\u0441\u0438\u0438 \u043F\u043E\u0441\u043B\u0435 24 \u0444\u0435\u0432\u0440\u0430\u043B\u044F 2022 \u0433\u043E\u0434\u0430.'
    publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0414\u0430\u043D\u0438\u043B\u043E\u0432\u0438\u0447'
    category = 'blog'
    cover_url = u'https://static.tildacdn.com/tild3765-3961-4337-b366-323437626331/thenewtab-sn.jpg'
    language = 'ru'
    no_stylesheets = True
    remove_javascript = False
    auto_cleanup = False
    oldest_article = 30
    max_articles_per_feed = 10

    remove_tags_before = dict(name='h1')

    remove_tags = [
        dict(name='div', attrs={'class': 't-popup'}),
        dict(name='div', attrs={'class': 't185'}),
        dict(name='div', attrs={'class': 't403'}),
        dict(name='div', attrs={'class': 't404'}),
        dict(name='div', attrs={'class': 't134'}),
        dict(name='div', attrs={'class': 'sp-form-fields-wrapper'})
    ]

    feeds = [
        ('\u041D\u043E\u0432\u0430\u044F \u0432\u043A\u043B\u0430\u0434\u043A\u0430','https://thenewtab.io/feed/')
        ]

    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-original': True}):
            img['src'] = img['data-original']
        return soup
Attached Images
     
Attached Files
File Type: recipe radiosvoboda_ua.recipe (6.5 KB, 922 views)
File Type: recipe vikna_ru.recipe (1.6 KB, 926 views)
File Type: recipe vikna_ua.recipe (1.6 KB, 909 views)
File Type: recipe gazetaua_ru.recipe (52 Bytes, 948 views)
File Type: recipe gazetaua_ua.recipe (52 Bytes, 961 views)
File Type: recipe newtab.recipe (1.8 KB, 913 views)
bugmen00t is offline   Reply With Quote