View Single Post
Old 08-19-2024, 01:46 PM   #30
bugmen00t
Connoisseur
bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!
 
bugmen00t's Avatar
 
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
New and fixed recipes (part 14)

Fixed Собеседник recipe (sobesednik.recipe): updated RSS URL.

Агентство: news site, sidekick of Проект.Медиа. Favicon.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe


class Agents(BasicNewsRecipe):
    title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
    description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.'
    __author__ = 'bugmen00t'
    publisher = 'Project Media'
    publication_type = 'news'
    oldest_article = 7
    max_articles_per_feed = 10
    language = 'ru'
    cover_url = 'https://cdn4.cdn-telegram.org/file/EBZ1b-EP9bQkgmu3zbLST2mKWxQ1Gk5yfyG8I09Jf7LSDlVJ2ebptzauijIr4c3QxfWTOxTk9dPjAF-Bg0XZtYOtHM5309L4FjuhH588Mj3Om5MQbT2R7Kc4nM4hVL9DS39UcnIT_k4Gh3JKCDxkNGS2ZlG3hJpkdyAvPoVNcdO9vfXWOuWkyDQAO9N3aoiK5TXAjLj5ViM7zSVX1vRLF_LF_8TBADdyT6TOkqjrzYv_35ARGxld6CtNLDwLh5plVKgnDkEiwlR82PjEabDBHOKRnPRhoF1KViWF-_CswOZtvC-Z2NBDcvwT4mMV-sfCZaEwQXaXaKNUDzyszmViEw.jpg'
    auto_cleanup = False
    no_stylesheets = False

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='article')

    feeds = [(
        u'\u0413\u0430\u0437\u0435\u0442\u0430 "\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A"',
        'https://www.agents.media/feed/'
    )]


Зеркало: Belarussian news site, successor of shut down TUT.BY portal. Favicon.
As there're no Belarussian recipes yet, I'm not sure which language code should be used for Russian (Belarus): ru_BY, ru_BE or just ru.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe

class Zerkalo(BasicNewsRecipe):
    title = u'\u0417\u0435\u0440\u043A\u0430\u043B\u043E'
    description = u'\u0417\u0435\u0440\u043A\u0430\u043B\u043E, \u043F\u0440\u043E\u0435\u043A\u0442 \u0447\u0430\u0441\u0442\u0438 \u0431\u044B\u0432\u0448\u0435\u0439 \u043A\u043E\u043C\u0430\u043D\u0434\u044B TUT.BY.'
    __author__ = 'bugmen00t'
    publisher = '\u0417\u0435\u0440\u043A\u0430\u043B\u043E (zerkalo.io)'
    publication_type = 'news'
    oldest_article = 7
    max_articles_per_feed = 100
    language = 'ru_BY'
    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/c/c7/Zerkalo.io.png'
    auto_cleanup = False
    no_stylesheets = False

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'id': 'article_body'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'b-addition m-single m-simplify'}),
        dict(name='div', attrs={'class': 'b-addition m-simplify'}),
        dict(name='iframe'),
        ] 

    feeds = [
#
# Original feeds
#
#        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0437\u0430 \u0434\u0435\u043D\u044C', 'https://news.zerkalo.io/rss/all.rss'),
#       ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u043D\u0435\u0434\u0435\u043B\u0438', 'https://news.zerkalo.io/rss/index.rss'),
#
# Censorship bypass
#
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0437\u0430 \u0434\u0435\u043D\u044C', 'https://br0wse-zerkalo.site/news/rss/all.rss'),
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u043D\u0435\u0434\u0435\u043B\u0438', 'https://br0wse-zerkalo.site/news/rss/index.rss'),
    ]


SotaVision: Russian independent news outlet. Favicon.
Fixes needed:
  • No images in some articles (lazyload)
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe

class Sotavision(BasicNewsRecipe):
    title = u'SOTA.Vision'
    description = u'\u0420\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u043E\u0435 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u043C\u0435\u0434\u0438\u0430, \u0441\u0444\u043E\u043A\u0443\u0441\u0438\u0440\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u043D\u0430 \u043E\u0441\u0432\u0435\u0449\u0435\u043D\u0438\u0438 \u043D\u0430\u0440\u0443\u0448\u0435\u043D\u0438\u044F \u043F\u0440\u0430\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u043A\u0430 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u043C \u043F\u0440\u0435\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0438 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.'
    __author__ = 'bugmen00t'
    publisher = '\u0410\u043B\u0435\u043A\u0441\u0430\u043D\u0434\u0440\u0430 \u0410\u0433\u0435\u0435\u0432\u0430, \u041E\u043B\u0435\u0433 \u0415\u043B\u0430\u043D\u0447\u0438\u043A,  \u0415\u0432\u0433\u0435\u043D\u0438\u0439 \u0414\u043E\u043C\u043E\u0436\u0438\u0440\u043E\u0432'
    publication_type = 'news'
    oldest_article = 7
    max_articles_per_feed = 10
    language = 'ru'
    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/SotaVision.png/600px-SotaVision.png'
    auto_cleanup = False
    no_stylesheets = True

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='span', attrs={'style': 'border-color:#EBEBEB;border-width:1px;width:100%;'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'td_block_wrap tdb_mobile_menu tdi_7 td-pb-border-top td_block_template_1 tdb-header-align'}),
        dict(name='div', attrs={'class': 'td_block_wrap tdb_single_author tdi_52 td-pb-border-top td_block_template_1 tdb-post-meta'}),
        dict(name='div', attrs={'class': 'td_block_wrap tdb_single_date tdi_53 td-pb-border-top td_block_template_1 tdb-post-meta'}),
        dict(name='div', attrs={'class': 'td_block_wrap tdb_single_post_views tdi_54 td-pb-border-top td_block_template_1 tdb-post-meta'}),
        dict(name='div', attrs={'class': 'td_block_wrap tdb_single_post_share tdi_55  td-pb-border-top td_block_template_1'}),
        dict(name='div', attrs={'class': 'td-post-sharing-visible'}),
        ] 

    feeds = [
        ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://sotavision.world/feed/'),
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://sotavision.world/category/news/feed/'),
        ('\u0418\u0441\u0442\u043E\u0440\u0438\u0438', 'https://sotavision.world/category/stories/feed/'),
        ('\u0412\u0438\u0434\u0435\u043E', 'https://sotavision.world/category/videos/feed/'),
        ('\u0424\u043E\u0442\u043E', 'https://sotavision.world/category/photos/feed/'),
        ('\u041A\u0430\u0440\u0442\u043E\u0447\u043A\u0438', 'https://sotavision.world/category/projects/feed/'),
        ('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://sotavision.world/category/opinions/feed/'),
    ]


GeekCity: modern culture portal: news, reviews and guides. Favicon.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe

class GeekCity(BasicNewsRecipe):
    title = u'GeekCity'
    description = u'\u041F\u043E\u0440\u0442\u0430\u043B \u043E \u043A\u043E\u043C\u0438\u043A\u0441\u0430\u0445, \u043A\u0438\u043D\u043E, \u0441\u0435\u0440\u0438\u0430\u043B\u0430\u0445, \u0438\u0433\u0440\u0430\u0445, sci-fi \u0438 \u043C\u043D\u043E\u0433\u043E\u043C \u0434\u0440\u0443\u0433\u043E\u043C. \u0417\u0434\u0435\u0441\u044C \u0432\u044B \u0432\u0441\u0435\u0433\u0434\u0430 \u0441\u043C\u043E\u0436\u0435\u0442\u0435 \u043D\u0430\u0439\u0442\u0438 \u0441\u0432\u0435\u0436\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u0441\u0430\u043C\u044B\u0445 \u0432\u0430\u0436\u043D\u044B\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u0445 \u0432 \u043C\u0438\u0440\u0435 \u0433\u0438\u043A\u043E\u0432, \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0435 \u043E\u0431\u0437\u043E\u0440\u044B, \u0433\u0430\u0439\u0434\u044B, \u0441\u0442\u0430\u0442\u044C\u0438 \u0438 \u043C\u043D\u043E\u0433\u043E\u0435 \u0434\u0440\u0443\u0433\u043E\u0435.'
    __author__ = 'bugmen00t'
    publisher = 'GeekCity.ru'
    publication_type = 'blog'
    oldest_article = 7
    max_articles_per_feed = 10
    language = 'ru'
    cover_url = 'https://geekcity.ru/wp-content/uploads/2021/03/og.jpg'
    auto_cleanup = False
    no_stylesheets = False

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='article')

    remove_tags =   [
        dict(name='div', attrs={'class': 'term-badges floated'}),
        dict(name='div', attrs={'class': 'post-meta single-post-meta'}),
        dict(name='div', attrs={'class': 'post-share single-post-share top-share clearfix style-1'}),
        dict(name='div', attrs={'class': 'post-share single-post-share bottom-share clearfix style-1'}),
        dict(name='div', attrs={'class': 'entry-terms post-tags clearfix'}),
        ] 

    feeds = [
        ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://geekcity.ru/feed/'),
        ('\u0411\u0435\u0437 \u043A\u0430\u0442\u0435\u0433\u043E\u0440\u0438\u0438', 'https://geekcity.ru/category/%D0%B1%D0%B5%D0%B7-%D1%80%D1%83%D0%B1%D1%80%D0%B8%D0%BA%D0%B8/feed/'),
        ('\u0421\u0430\u0439\u0442', 'https://geekcity.ru/category/site/'),
        ('\u0424\u0438\u043B\u044C\u043C\u044B', 'https://geekcity.ru/category/moviespod/feed/'),
        ('\u0421\u0435\u0440\u0438\u0430\u043B\u044B', 'https://geekcity.ru/category/tvshow/feed/'),
        ('\u041A\u043E\u043C\u0438\u043A\u0441\u044B', 'https://geekcity.ru/category/comics-2/feed/'),
        ('\u0418\u0433\u0440\u044B', 'https://geekcity.ru/category/games/feed/'),
        ('\u0422\u0435\u0445\u043D\u043E', 'https://geekcity.ru/category/tech/feed/'),
        ('\u0428\u043C\u043E\u0442', 'https://geekcity.ru/category/shopping/feed/'),
        ('\u0413\u0430\u0439\u0434\u044B', 'https://geekcity.ru/category/guides/feed/'),
    ]
Attached Images
    
Attached Files
File Type: recipe sobesednik.recipe (1.2 KB, 359 views)
File Type: recipe agents.recipe (1.2 KB, 356 views)
File Type: recipe zerkalo.recipe (1.7 KB, 361 views)
File Type: recipe sotavision.recipe (3.2 KB, 348 views)
File Type: recipe geekcity.recipe (3.0 KB, 357 views)
bugmen00t is offline   Reply With Quote