Connoisseur
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
|
New recipes (part 09 of ??)
More Russian recipes
Люди Байкала: blog about life in rural Siberia. Favicon.
Fixes needed: - No lead image, part of lead text is missing
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class BaikalJournal(BasicNewsRecipe):
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
__author__ = 'bugmen00t'
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.'
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430'
category = 'blog'
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
language = 'ru'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
oldest_article = 30
max_articles_per_feed = 10
remove_tags = [
dict(name='div', attrs={'class': 'distance-badge'}),
dict(name='div', attrs={'class': 'lead-footer__sharing'}),
dict(name='div', attrs={'class': 'm-block-ctaline'}),
dict(name='div', attrs={'class': 'm-block-readmore format-inline'}),
dict(name='footer'),
dict(name='div', attrs={'class': 'related-block'}),
dict(name='div', attrs={'class': 'selection-block'}),
dict(name='div', attrs={'class': 'last-cta'})
]
feeds = [
('\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430', 'https://baikal-journal.ru/feed/')
]
Настоящее время: Prague-based Russian-language TV channel founded by RFE/RL & VoA. Favicon.
Fixes needed: - In some aticles, all text is conglamerated into a single paragraph
- Sometimes live online text translations are empty
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class CurrentTime(BasicNewsRecipe):
title = '\u041D\u0430\u0441\u0442\u043E\u044F\u0449\u0435\u0435 \u0432\u0440\u0435\u043C\u044F'
__author__ = 'bugmen00t'
description = '\u0422\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B "\u041D\u0430\u0441\u0442\u043E\u044F\u0449\u0435\u0435 \u0412\u0440\u0435\u043C\u044F" \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u0442 \u043E \u0432\u0430\u0436\u043D\u044B\u0445 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u0445 \u0438 \u0437\u043B\u043E\u0431\u043E\u0434\u043D\u0435\u0432\u043D\u044B\u0445 \u0442\u0435\u043C\u0430\u0445, \u043F\u0440\u0435\u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u044F \u0430\u0443\u0434\u0438\u0442\u043E\u0440\u0438\u0438 \u0442\u043E, \u0447\u0442\u043E \u043D\u0435 \u0432\u0441\u0435\u0433\u0434\u0430 \u043C\u043E\u0433\u0443\u0442 \u043E\u0431\u0435\u0441\u043F\u0435\u0447\u0438\u0442\u044C \u043C\u0435\u0441\u0442\u043D\u044B\u0435 \u0421\u041C\u0418: \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0431\u0435\u0437 \u0446\u0435\u043D\u0437\u0443\u0440\u044B, \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0439 \u043E\u0431\u043C\u0435\u043D \u043C\u043D\u0435\u043D\u0438\u044F\u043C\u0438, \u043E\u0442\u043A\u0440\u044B\u0442\u043E\u0435 \u043E\u0431\u0441\u0443\u0436\u0434\u0435\u043D\u0438\u0435 \u043F\u0440\u043E\u0431\u043B\u0435\u043C.'
publisher = 'RFE/RL ("\u0420\u0430\u0434\u0438\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u043D\u0430\u044F \u0415\u0432\u0440\u043E\u043F\u0430"/"\u0420\u0430\u0434\u0438\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430") \u043F\u0440\u0438 \u0443\u0447\u0430\u0441\u0442\u0438\u0438 VoA ("\u0413\u043E\u043B\u043E\u0441 \u0410\u043C\u0435\u0440\u0438\u043A\u0438")'
category = 'newspaper'
cover_url = u'https://www.currenttime.tv/Content/responsive/RFE/ru-RU-TV/img/top_logo_news.png'
language = 'ru'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
remove_empty_feeds = True
oldest_article = 14
max_articles_per_feed = 20
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'body-container'})
remove_tags = [
dict(name='div', attrs={'class': 'publishing-details '}),
dict(name='div', attrs={'class': 'separator'}),
dict(name='div', attrs={'class': 'links'}),
dict(name='div', attrs={'class': 'share--box'}),
dict(name='aside'),
dict(name='div', attrs={'class': 'media-block also-read'}),
dict(name='div', attrs={'class': 'media-block-wrap'}),
dict(name='div', attrs={'class': 'media-download'}),
dict(name='div', attrs={'class': 'wsw__embed'}),
dict(name='div', attrs={'class': 'share--box'}),
dict(name='div', attrs={'class': 'prog-hdr'}),
dict(name='div', attrs={'class': 'dropdown__holder'}),
dict(name='div', attrs={'id': 'ymla-section'}),
dict(name='div', attrs={'id': 'comments'}),
dict(name='a', attrs={'class': 'back-to-top-link'}),
dict(name='footer'),
dict(name='li', attrs={'class': 'socials block-socials'}),
dict(name='div', attrs={'data-sp_api': 'pangea-video'})
]
feeds = [
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.currenttime.tv/api/zgbip_e_tpp_'),
('\u0421\u0435\u043C\u044C \u0441\u043E\u0440\u043E\u043A', 'https://www.currenttime.tv/api/ztktpyeimupt'),
('\u042D\u043A\u0441\u043A\u043B\u044E\u0437\u0438\u0432', 'https://www.currenttime.tv/api/zpyomoe-rimi'),
('\u0412\u044B\u0431\u043E\u0440 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u0438', 'https://www.currenttime.tv/api/zqk-poekpbpo'),
('\u0420\u043E\u0441\u0441\u0438\u044F', 'https://www.currenttime.tv/api/zuvmpvepo_pv'),
('\u0423\u043A\u0440\u0430\u0438\u043D\u0430', 'https://www.currenttime.tv/api/zkvmptemo_pt'),
('\u0411\u0435\u043B\u0430\u0440\u0443\u0441\u044C', 'https://www.currenttime.tv/api/zvvmm_eoo_mt'),
('\u0410\u0437\u0438\u044F', 'https://www.currenttime.tv/api/zbvtpqetoupq'),
('\u0415\u0432\u0440\u043E\u043F\u0430', 'https://www.currenttime.tv/api/z-vmpoevo_pi'),
('\u0410\u043C\u0435\u0440\u0438\u043A\u0430', 'https://www.currenttime.tv/api/zbvmpieto_pp'),
('\u0411\u043B\u0438\u0436\u043D\u0438\u0439 \u0412\u043E\u0441\u0442\u043E\u043A', 'https://www.currenttime.tv/api/zrvtppeuqupm'),
('\u041B\u043E\u043D\u0433\u0440\u0438\u0434\u044B', 'https://www.currenttime.tv/api/zibmmyejv_my'),
('\u0420\u0435\u043F\u043E\u0440\u0442\u0430\u0436', 'https://www.currenttime.tv/api/zrpppqeujppo'),
('\u0420\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u044B \u0412\u0435\u0447\u0435\u0440', 'https://www.currenttime.tv/api/zvrrmoeourmp'),
('\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E', 'https://www.currenttime.tv/api/zqpppoekjppi'),
('\u0418\u043D\u0444\u043E\u0433\u0440\u0430\u0444\u0438\u043A\u0430', 'https://www.currenttime.tv/api/zmqmpyebumpv')
]
Фонтанка: Saint Petersburg news portal. Favicon.
Fixes needed: - Broken formatting & missing text in articles with complex layout
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class Fontanka(BasicNewsRecipe):
title = '\u0424\u043E\u043D\u0442\u0430\u043D\u043A\u0430'
__author__ = 'bugmen00t'
description = '\u0415\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u043E\u0435 \u043F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0441\u043A\u043E\u0435 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0441\u0435\u0442\u0435\u0432\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435.'
publisher = '\u0410\u041E "\u0410\u0416\u0423\u0420-\u041C\u0415\u0414\u0418\u0410"'
category = 'blog'
cover_url = u'https://www.fontanka.ru/longreads/69505589/2015/images/tild3834-3362-4166-b239-366134363733____-01.png'
language = 'ru'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 30
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='section', attrs={'itemprop': 'articleBody'})
remove_tags = [
dict(name='div', attrs={'class': 'ADdj ADc5'}),
dict(name='div', attrs={'class': 'DLj1'}),
dict(name='div', attrs={'class': 'DTrp'}),
dict(name='div', attrs={'class': 'EHed'}),
dict(name='div', attrs={'class': 'FHwp'}),
dict(name='div', attrs={'class': 'ENr-'}),
dict(name='div', attrs={'class': 'ENt7'}),
dict(name='div', attrs={'class': 't004'}),
dict(name='div', attrs={'class': 't120'}),
dict(name='div', attrs={'class': 't123'}),
dict(name='div', attrs={'class': 't405'}),
dict(name='div', attrs={'class': 't463'}),
# For articles from https://doctorpiter.ru
dict(name='div', attrs={'class': 'article__block article__block_type-links'}),
dict(name='div', attrs={'class': 'feedback-request-form__before'}),
dict(name='div', attrs={'class': 'related-entities-container'}),
dict(name='div', attrs={'class': 'tags article-footer__tags'}),
dict(name='hr', attrs={'class': 'article-footer-divider'})
]
feeds = [
('Fontanka.ru', 'https://www.fontanka.ru/fontanka.rss')
]
The Bell: business news Favicon.
Fixes needed: Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class TheBell(BasicNewsRecipe):
title = 'The Bell'
__author__ = 'bugmen00t'
description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u0438 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u044B\u0439 \u0431\u044E\u043B\u043B\u0435\u0442\u0435\u043D\u044C, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0431\u0438\u0437\u043D\u0435\u0441-\u043D\u043E\u0432\u043E\u0441\u0442\u044F\u0445.'
publisher = '\u0415\u043B\u0438\u0437\u0430\u0432\u0435\u0442\u0430 \u041E\u0441\u0435\u0442\u0438\u043D\u0441\u043A\u0430\u044F, \u0418\u0440\u0438\u043D\u0430 \u041C\u0430\u043B\u043A\u043E\u0432\u0430'
category = 'newspaper'
cover_url = u'https://thebell.io/wp-content/uploads/2018/03/thebell-cover.png'
language = 'ru'
no_stylesheets = True
remove_javascript = False
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 20
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'post'})
remove_tags = [
dict(name='div', attrs={'class': 'post_bottom ng-tns-c84-1 ng-star-inserted'}),
dict(name='div', attrs={'class': 'post_tags ng-tns-c84-1 ng-star-inserted'}),
dict(name='div', attrs={'class': 'post_subscribe ng-tns-c84-1 ng-star-inserted'}),
dict(name='div', attrs={'class': 'author__content_image author__content_image_small ng-star-inserted'}),
dict(name='div', attrs={'class': 'email email_small ng-star-inserted'}),
dict(name='div', attrs={'class': 'post_share ng-tns-c84-1'}),
dict(name='div', attrs={'class': 'social_media'})
]
feeds = [
('The Bell', 'https://thebell.io/feed')
]
Fixed recipe for Сноб (snob.recipe): URL transformation subroutine lacked the last line
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class Snob(BasicNewsRecipe):
title = '\u0421\u043D\u043E\u0431'
__author__ = 'bugmen00t'
description = '\u0414\u0438\u0441\u043A\u0443\u0441\u0441\u0438\u043E\u043D\u043D\u043E\u0435, \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E\u0435 \u0438 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0435 \u043F\u0440\u043E\u0441\u0442\u0440\u0430\u043D\u0441\u0442\u0432\u043E \u0434\u043B\u044F \u043B\u044E\u0434\u0435\u0439, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0436\u0438\u0432\u0443\u0442 \u0432 \u0440\u0430\u0437\u043D\u044B\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445, \u0433\u043E\u0432\u043E\u0440\u044F\u0442 \u043D\u0430 \u0440\u0430\u0437\u043D\u044B\u0445 \u044F\u0437\u044B\u043A\u0430\u0445, \u043D\u043E \u0434\u0443\u043C\u0430\u044E\u0442 \u043F\u043E-\u0440\u0443\u0441\u0441\u043A\u0438.'
publisher = '\u041E\u041E\u041E \u00AB\u0421\u043D\u043E\u0431 \u041C\u0435\u0434\u0438\u0430\u00BB'
category = 'magazine'
cover_url = u'https://snob.ru/indoc/tilda/995317/images/tild3233-6631-4664-b663-353636373235__e3057a5fee932ada1aaf.png'
language = 'ru'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
oldest_article = 5
max_articles_per_feed = 50
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='article')
remove_tags = [
dict(name='div', attrs={'class': 'entry__tags'}),
dict(name='div', attrs={'class': 'entry__likes'})
]
feeds = [
('\u0421\u043D\u043E\u0431', 'https://snob.ru/rss/')
]
def get_article_url(self, article):
link = article.get('link', None)
if 'utm_source' in link:
return link.split('?utm')[0]
|