MobileRead Forums - View Single Post - (mostly) Russian and Ukrainian sources: state of built-in recipes, fixes, new recipes

bugmen00t · 08-10-2022, 09:15 AM

Another bunch of Russian recipes, and a few unrelated ones.

Wonderzine: style, beauty, fashion and lifestyle blog. Favicon.
Fixes needed:

Partial text in paywalled articles

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Wonderzine(BasicNewsRecipe):
    title           	  = 'Wonderzine'
    __author__            = 'bugmen00t'
    description           = '\u041E\u043D\u043B\u0430\u0439\u043D-\u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u0434\u043B\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u0436\u0435\u043D\u0449\u0438\u043D \u0438 \u0438\u0445 \u0434\u0440\u0443\u0437\u0435\u0439. \u041C\u044B \u043F\u0438\u0448\u0435\u043C \u043E \u043C\u043E\u0434\u0435 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435, \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0445 \u0442\u0435\u043D\u0434\u0435\u043D\u0446\u0438\u044F\u0445 \u0438 \u0440\u0430\u0437\u0432\u043B\u0435\u0447\u0435\u043D\u0438\u044F\u0445, \u0437\u0434\u043E\u0440\u043E\u0432\u044C\u0435 \u0438 \u0441\u0435\u043A\u0441\u0435, \u044D\u0442\u0438\u043A\u0435 \u0438 \u0432\u043E\u0437\u043C\u043E\u0436\u043D\u043E\u0441\u0442\u044F\u0445, \u043E\u0441\u043E\u0437\u043D\u0430\u043D\u043D\u044B\u0445 \u0442\u0440\u0430\u0442\u0430\u0445 \u0438 \u0440\u0430\u0437\u043D\u043E\u043E\u0431\u0440\u0430\u0437\u0438\u0438 \u043A\u0440\u0430\u0441\u043E\u0442\u044B.'
    publisher             = 'Redefine Media holding'
    category              = 'blog'
    cover_url = u'https://lamcdn.net/wonderzine.com/post-og_image/WQRiRGqLlcKyEHsnOS-sUw.png'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 14
    max_articles_per_feed = 20

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'class': 'is_article_end'})

    remove_tags =   [
        dict(name='ul', attrs={'class': 'post-meta'}),
        dict(name='a', attrs={'class': 'flow'}),
        dict(name='a', attrs={'class': 'banner-ad-link'}),
        dict(name='div', attrs={'class': 'share-buttons-bar'}),
        dict(name='div', attrs={'class': 'pseudosidebar'})        
        ] 

    feeds = [
        ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://www.wonderzine.com/feeds/posts.atom'),
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.wonderzine.com/feeds/posts.atom?news=only'),
        ('\u0416\u0438\u0437\u043D\u044C', 'https://www.wonderzine.com/feeds/posts.atom?topic=life'),
        ('\u0417\u0434\u043E\u0440\u043E\u0432\u044C\u0435', 'https://www.wonderzine.com/feeds/posts.atom?topic=health'),
        ('\u041A\u0440\u0430\u0441\u043E\u0442\u0430', 'https://www.wonderzine.com/feeds/posts.atom?topic=beauty'),
        ('\u0420\u0430\u0437\u0432\u043B\u0435\u0447\u0435\u043D\u0438\u044F', 'https://www.wonderzine.com/feeds/posts.atom?topic=entertainment'),
        ('\u0421\u0442\u0438\u043B\u044C', 'https://www.wonderzine.com/feeds/posts.atom?topic=style'),
        ('\u0412\u043E\u0437\u043C\u043E\u0436\u043D\u043E\u0441\u0442\u0438', 'https://www.wonderzine.com/feeds/posts.atom?topic=opportunities')
        ]

Vedomosit: daily newspaper; non-paywlled content only. Favicon.

Spoiler:

Bellingcat: OSINT investigaton and citizen journalism blog. Favicon.

English version:

Spoiler:

Russian version:

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
    __author__            = 'bugmen00t'
    description           = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://ru.bellingcat.com/feed/')
        ]

Ukrainian version:

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
    __author__            = 'bugmen00t'
    description           = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'uk'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://uk.bellingcat.com/feed/')
        ]

Version en français (no updates since 2021):

Spoiler:

Versión en Español (no updates since 2021):

Spoiler:

Росбалт: federal news agency with emphasis on Saint Petersburg. Favicon.

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Rosbalt(BasicNewsRecipe):
    title           	  = '\u0420\u043E\u0441\u0431\u0430\u043B\u0442'
    __author__            = 'bugmen00t'
    description           = '\u0424\u0435\u0434\u0435\u0440\u0430\u043B\u044C\u043D\u043E\u0435 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E-\u0430\u043D\u0430\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E. \u041A\u0440\u0443\u0433\u043B\u043E\u0441\u0443\u0442\u043E\u0447\u043D\u043E \u0441\u043B\u0435\u0434\u0438\u043C \u0437\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0432 \u0441\u0442\u0440\u0430\u043D\u0435 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0443\u0431\u043B\u0438\u043A\u0443\u0435\u043C \u0441\u0430\u043C\u044B\u0435 \u0441\u0432\u0435\u0436\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0432\u0435\u0434\u0443\u0449\u0438\u0445 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0438\u043D\u0442\u0435\u0440\u0432\u044C\u044E \u0441 \u0432\u0435\u0434\u0443\u0449\u0438\u043C\u0438 \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u0442\u0435\u043B\u044F\u043C\u0438 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u043E\u0433\u043E \u0431\u0438\u0437\u043D\u0435\u0441\u0430 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0438.'
    publisher             = '\u0410\u041E \u00AB\u0420\u0421-\u0411\u0430\u043B\u0442\u00BB'
    category              = 'newspaper'
    cover_url = u'https://i.ytimg.com/vi/eeMqJGbjuY0/maxresdefault.jpg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 100


    remove_tags_before = dict(name='div', attrs={'class': 'main-content'})

    remove_tags_after = dict(name='div', attrs={'class': 'newstext'})

    remove_tags =   [
        dict(name='nav'),
        dict(name='aside'),
        dict(name='div', attrs={'class': 'news-left'}),
        dict(name='div', attrs={'class': 'rkngov'}),
        dict(name='div', attrs={'class': 'eye'})
        ] 

    feeds = [
        ('\u0420\u043E\u0441\u0431\u0430\u043B\u0442', 'https://www.rosbalt.ru/feed/')
        ]

Debunking Denialism: blog about pseudoscience in everyday life. Favicon.

Spoiler: