Connoisseur
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
|
New recipes (part 11 of ??)
More Russian and Ukrainian sources.
Cвободное пространство: news aggregator from "Novaya Gazeta" editors team. Favicon replacemet.
Fixes needed: - No images in articles (.webp)
- Poor text formatting
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class NovayaGazeta(BasicNewsRecipe):
title = '\u0421\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0435 \u043F\u0440\u043E\u0441\u0442\u0440\u0430\u043D\u0441\u0442\u0432\u043E'
__author__ = 'bugmen00t'
description = '\u041E\u0442\u043A\u0440\u044B\u0442\u044B\u0439 \u043A\u043E\u043D\u0441\u0442\u0440\u0443\u043A\u0442\u043E\u0440 \u043A\u043E\u043D\u0442\u0435\u043D\u0442\u0430, \u043F\u043E\u0434\u0433\u043E\u0442\u043E\u0432\u043B\u0435\u043D\u043D\u044B\u0439 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u043E\u0439 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u0435\u0439 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB, \u0434\u0435\u0442\u0430\u043B\u0438 \u043A\u043E\u0442\u043E\u0440\u043E\u0433\u043E \u043A\u0430\u0436\u0434\u044B\u0439 \u0438\u0437 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u0435\u0439 \u0441\u043E\u0431\u0438\u0440\u0430\u0435\u0442 \u0434\u043B\u044F \u0441\u0435\u0431\u044F \u0441\u0430\u043C. \u041A\u043E\u043D\u0441\u0442\u0440\u0443\u043A\u0442\u043E\u0440 \u2014 \u044D\u0442\u043E \u043D\u043E\u0432\u044B\u0439 \u0441\u043F\u043E\u0441\u043E\u0431 \u0447\u0438\u0442\u0430\u0442\u044C \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB \u0438 \u0432\u0441\u0435 \u043E\u0441\u043D\u043E\u0432\u043D\u044B\u0435 \u043F\u043E\u0442\u043E\u043A\u0438 \u043A\u043E\u043D\u0442\u0435\u043D\u0442\u0430: \u043B\u044E\u0431\u044B\u0435 \u0432\u0435\u0431-\u0441\u0430\u0439\u0442\u044B, Telegram-\u043A\u0430\u043D\u0430\u043B\u044B, Youtube-\u043A\u0430\u043D\u0430\u043B\u044B.'
publisher = '\u0420\u0435\u0434\u0430\u043A\u0446\u0438\u043E\u043D\u043D\u044B\u0439 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB'
category = 'newspaper'
cover_url = u'https://novaya.media/ic_puzzle_footer.svg'
language = 'ru'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 30
remove_tags_before = dict(name='h1', attrs={'class': 'Header_title__2zxwH'})
remove_tags_after = dict(name='div', attrs={'class': 'Post_contentInnerWrapper__3GWHl'})
feeds = [
('\u0421\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0435 \u043F\u0440\u043E\u0441\u0442\u0440\u0430\u043D\u0441\u0442\u0432\u043E', 'https://novaya.media/feed/rss')
]
def print_version(self, url):
return url + '?print=true'
KyivPost: Ukraine’s English-language newspaper. Favicon.
Fixes needed: English version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class KyivPost(BasicNewsRecipe):
title = 'Kyiv Post'
__author__ = 'bugmen00t'
description = 'The Kyiv Post is Ukraine\u2019s English-language newspaper. The newspaper\u2019s first print edition came out on Oct. 18, 1995, and went online in 1997. The newspaper\u2019s motto is \u201CUkraine\u2019s Global Voice,\u201D which in 2018 replaced the previous motto of \u201CIndependence. Community. Trust.\u201D Both slogans reflect the newspaper\u2019s commitment to the highest journalistic and ethical standards.'
publisher = 'BIZNESGRUPP TOV'
category = 'newspaper'
cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg'
language = 'en_UK'
no_stylesheets = False
remove_javascript = True
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 10
remove_tags_before = dict(name='article', attrs={'class': 'article'})
remove_tags_after = dict(name='article', attrs={'class': 'article'})
remove_tags = [
dict(name='div', attrs={'class': 'entry-footer hide_post_header'})
]
feeds = [
('News', 'https://www.kyivpost.com/feed')
]
Ukrainian version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class KyivPost(BasicNewsRecipe):
title = 'Kyiv Post'
__author__ = 'bugmen00t'
description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u0430\u043D\u0433\u043B\u043E\u043C\u043E\u0432\u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u0430, \u043D\u0430\u0433\u043E\u0440\u043E\u0434\u0436\u0435\u043D\u0430 \u0443 2014 \u0440\u043E\u043A\u0443. \u041F\u0435\u0440\u0448\u0438\u0439 \u0434\u0440\u0443\u043A\u043E\u0432\u0430\u043D\u0438\u0439 \u043F\u0440\u0438\u043C\u0456\u0440\u043D\u0438\u043A \u0433\u0430\u0437\u0435\u0442\u0438 \u0432\u0438\u0439\u0448\u043E\u0432 18 \u0436\u043E\u0432\u0442\u043D\u044F 1995 \u0440\u043E\u043A\u0443, \u0430 \u0432 \u0406\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0430\u0440\u0456\u0430\u043D\u0442\u0456 Kyiv Post \u0437\u2019\u044F\u0432\u0438\u043B\u0430\u0441\u044C \u0432 1997 \u0440\u043E\u0446\u0456. \u0414\u0435\u0432\u0456\u0437 \u0433\u0430\u0437\u0435\u0442\u0438: \u00AB\u0413\u043B\u043E\u0431\u0430\u043B\u044C\u043D\u0438\u0439 \u0433\u043E\u043B\u043E\u0441 \u0423\u043A\u0440\u0430\u0457\u043D\u0438\u00BB, \u044F\u043A\u0438\u0439 \u0443 2018 \u0440\u043E\u0446\u0456 \u0437\u0430\u043C\u0456\u043D\u0438\u0432 \u043F\u043E\u043F\u0435\u0440\u0435\u0434\u043D\u0456\u0439 \u0434\u0435\u0432\u0456\u0437 \u00AB\u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0456\u0441\u0442\u044C. \u0421\u043F\u0456\u043B\u044C\u043D\u043E\u0442\u0430. \u0414\u043E\u0432\u0456\u0440\u0430.\u00BB \u041E\u0431\u0438\u0434\u0432\u0430 \u0433\u0430\u0441\u043B\u0430 \u0432\u0456\u0434\u043E\u0431\u0440\u0430\u0436\u0430\u044E\u0442\u044C \u0442\u0435, \u0449\u043E \u0443 \u0432\u0438\u0434\u0430\u043D\u043D\u0456 \u0434\u043E\u0442\u0440\u0438\u043C\u0443\u044E\u0442\u044C\u0441\u044F \u043D\u0430\u0439\u0432\u0438\u0449\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0441\u044C\u043A\u0438\u0445 \u0442\u0430 \u0435\u0442\u0438\u0447\u043D\u0438\u0445 \u0441\u0442\u0430\u043D\u0434\u0430\u0440\u0442\u0456\u0432.'
publisher = 'BIZNESGRUPP TOV'
category = 'newspaper'
cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg'
language = 'uk'
no_stylesheets = False
remove_javascript = True
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 10
remove_tags_before = dict(name='article', attrs={'class': 'article'})
remove_tags_after = dict(name='article', attrs={'class': 'article'})
remove_tags = [
dict(name='div', attrs={'class': 'entry-footer hide_post_header'})
]
feeds = [
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.kyivpost.com/uk/feed')
]
Arabic version ( not sure if output is correct as the text is not RTL)  :
The Moscow Times: English- and Russian-language online newspaper. Favicon.
Russain version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class MoscowTimes(BasicNewsRecipe):
title = 'The Moscow Times'
__author__ = 'bugmen00t'
description = '\u0410\u043D\u0433\u043B\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0420\u043E\u0441\u0441\u0438\u0438. \u041D\u0430\u0448\u0430 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F \u0434\u0435\u043B\u0438\u0442\u0441\u044F \u0441 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u043C\u0438 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u043C\u0438 \u0438 \u0441\u0430\u043C\u044B\u043C\u0438 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u043C\u0438 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0441\u0430\u043C\u043E\u0439 \u0431\u043E\u043B\u044C\u0448\u043E\u0439 \u0441\u0442\u0440\u0430\u043D\u044B \u043D\u0430 \u0417\u0435\u043C\u043B\u0435.'
publisher = '\u041E\u041E\u041E \u0422\u0438\u044D\u043C\u0442\u0438'
category = 'newspaper'
cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg'
language = 'ru'
no_stylesheets = False
remove_javascript = True
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 50
remove_tags_before = dict(name='article')
remove_tags_after = dict(name='div', attrs={'class': 'article__bottom'})
remove_tags = [
dict(name='div', attrs={'class': 'social'}),
dict(name='div', attrs={'class': 'related-article__content'})
]
feeds = [
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.moscowtimes.ru/rss/news'),
('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://www.moscowtimes.ru/rss/opinion'),
('\u041F\u0435\u0440\u0435\u0432\u043E\u0434\u044B Financial Times', 'https://www.moscowtimes.ru/rss/ft')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
English version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class MoscowTimes(BasicNewsRecipe):
title = 'The Moscow Times'
__author__ = 'bugmen00t'
description = 'The Moscow Times is Russia’s leading, independent English-language media outlet. Our team of Russian and English journalists provide readers across the world with breaking news, engaging stories and balanced reporting about the largest country on Earth.'
publisher = 'Tiamti LLC'
category = 'newspaper'
cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg'
language = 'en_RU'
no_stylesheets = False
remove_javascript = True
auto_cleanup = False
oldest_article = 14
max_articles_per_feed = 50
remove_tags_before = dict(name='article')
remove_tags_after = dict(name='div', attrs={'class': 'article__tags'})
remove_tags = [
dict(name='aside'),
dict(name='footer'),
dict(name='section', attrs={'class': 'cluster'}),
dict(name='div', attrs={'class': 'article__tags'}),
dict(name='div', attrs={'class': 'social'}),
dict(name='div', attrs={'class': 'related-article__content'})
]
feeds = [
('News', 'https://www.themoscowtimes.com/rss/news'),
('Opinion', 'https://www.themoscowtimes.com/rss/opinion'),
('Arts and Life', 'https://www.themoscowtimes.com/rss/city'),
('Meanwhile', 'https://www.themoscowtimes.com/rss/meanwhile')
]
Довод: local news from Vladimir city & local regions. Favicon.
Fixes needed:
Last edited by bugmen00t; 08-17-2022 at 09:16 AM.
|