MobileRead Forums - View Single Post - (mostly) Russian and Ukrainian sources: state of built-in recipes, fixes, new recipes

bugmen00t · 07-22-2022, 02:19 PM

NEW ENGLISH RECIPES (OF RUSSIAN SOURCES)

Novaya Gazeta Europe (English version): European re-incarnation of Novaya Gazeta newspaper. Favicon.

Fixes needed:

No images in articles (webp format)

Spoiler:

Meduza (English version): Latvia-based media founded by a group of former employees of the then independent Lenta.ru news website. Favicon.

Spoiler:

Holod (English version): Taisiya Bekbulatova's project with longreads, podcasts and videos on social and political topics about Russia. No updates since June 2022. Favicon replacement.

Spoiler:

NEW RUSSIAN RECIPES

Медуза: Latvia-based media founded by a group of former employees of the then independent Lenta.ru news website. Favicon.

Spoiler:

Собеседник: Russian weekly socio-political newspaper. Favicon.

Spoiler:

Холод: Taisiya Bekbulatova's project with longreads, podcasts and videos on social and political topics. Favicon replacement.

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe, classes

class Kholod(BasicNewsRecipe):
    title = u'\u0425\u043E\u043B\u043E\u0434'
    description = u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438 \u043E \u0420\u043E\u0441\u0441\u0438\u0438: \u043C\u044B \u0438\u0449\u0435\u043C \u0437\u0430\u0445\u0432\u0430\u0442\u044B\u0432\u0430\u044E\u0449\u0438\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438 \u043F\u043E \u0432\u0441\u0435\u0439 \u0420\u043E\u0441\u0441\u0438\u0438, \u0430 \u043F\u043E\u0442\u043E\u043C \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u043C \u0432\u0430\u043C.'
    __author__ = 'bugmen00t'
    publisher = '\u0422\u0430\u0438\u0441\u0438\u044F \u0411\u0435\u043A\u0431\u0443\u043B\u0430\u0442\u043E\u0432\u0430'
    publication_type = 'blog'
    oldest_article = 14
    max_articles_per_feed = 200
    language = 'ru'
    cover_url = 'https://image.simplecastcdn.com/images/93a97011-6988-4787-8242-e202b2840fde/08e85f64-9901-44e1-b20c-7da01c5ce0c0/holodpodcastlogo.jpg'
    auto_cleanup = False
    no_stylesheets = False
    
    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'class': 'article__content the-content text-column'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'inlinemore'})
        ]

    feeds = [
        (u'\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u043E', 'https://holod.media/sections/daily/feed/'),
        (u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438', 'https://holod.media/sections/stories/feed/'),
        (u'\u041C\u043D\u0435\u043D\u0438\u044F', 'https://holod.media/sections/opinions/feed/'),
        (u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E', 'https://holod.media/sections/interviews/feed/'),
        (u'\u041E\u0431\u044A\u044F\u0441\u043D\u044F\u0435\u043C', 'https://holod.media/sections/explainers/feed/')
    ]

Важные истории: Russian website specialising in investigative journalism. Favicon replacement #1, Favicon replacement #2.

Fixes needed:

No article header image
No images in some articles (webp format)

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe, classes

class IStories(BasicNewsRecipe):
    title = u'\u0412\u0430\u0436\u043D\u044B\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438'
    description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.'
    __author__ = 'bugmen00t'
    publisher = 'Roman Anin & Olesya Shmagun'
    publication_type = 'blog'
    oldest_article = 21
    max_articles_per_feed = 50
    language = 'ru'
    cover_url = 'https://static.istories.media/public/cover.png'
    auto_cleanup = False
    no_stylesheets = True
    
    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='article')

    remove_tags =   [
        dict(name='header'),
        dict(name='footer'),
        dict(name='form', attrs={'class': 'subscr'}),
        dict(name='div', attrs={'class': 'row'}),
        dict(name='div', attrs={'class': 'arrow-black'}),
        dict(name='div', attrs={'class': 'article-foot'}),
        dict(name='div', attrs={'class': 'article-toggle'}),
        dict(name='div', attrs={'class': 'article-soc'})
        ]

    feeds = [
        (u'\u0438\u0441\u0442\u043E\u0440\u0438\u0438', 'https://istories.media/rss/all.xml')
    ]

N+1: science & technology news. Favicon.

Spoiler:

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1467724863(BasicNewsRecipe):
    title          		  = 'N+1'
    __author__            = 'bugmen00t'
    description           = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!'
    publisher             = 'N+1'
    category              = 'news'
    cover_url = u'https://nplus1.ru/i/logo.png'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = True
    auto_cleanup   = False
    oldest_article = 14
    max_articles_per_feed = 100

#    remove_tags_before = dict(name='article', attrs={'class':'content'})
    remove_tags_before = dict(name='h1')
    
    remove_tags_after = dict(name='div', attrs={'class':'body'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'share-incut'}),
        dict(name='div', attrs={'class': 'share-mobile'})
        ] 

    feeds = [
        ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u043d\u0430\u0443\u043a\u0438', 'https://nplus1.ru/rss'),
    ]

NEW UKRAINIAN RECIPES

BBC News Україна: BBC News in Ukrainian. Favicon.

Fixes needed:

No images in some articles (lazyload)
No images in some articles (webp format)

Spoiler: