Connoisseur
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
|
New/fixed Russian and Ukrainian recipes (part 14)
UA-Футбол: soccer news from Ukraine and around the world. Favicon.
Fixes needed: - Text artifacts in articles with complex formatting (live feeds etc.)
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
class UAFootball(BasicNewsRecipe):
#Russian version
# title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B'
# description = '\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u0444\u0443\u0442\u0431\u043E\u043B\u044C\u043D\u043E\u0439 \u0436\u0438\u0437\u043D\u0438 \u0423\u043A\u0440\u0430\u0438\u043D\u044B \u0438 \u0432\u0441\u0435\u0433\u043E \u043C\u0438\u0440\u0430.'
# language = 'ru_UK'
# feeds = [
# ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0444\u0443\u0442\u0431\u043E\u043B\u0430', 'https://www.ua-football.com/rss/all.xml')
# ]
#Ukrainian version
title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B'
description = '\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0456 \u0442\u0435\u043C\u0438 \u0444\u0443\u0442\u0431\u043E\u043B\u044C\u043D\u043E\u0433\u043E \u0436\u0438\u0442\u0442\u044F \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0442\u0430 \u0432\u0441\u044C\u043E\u0433\u043E \u0441\u0432\u0456\u0442\u0443.'
language = 'uk'
feeds = [
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.ua-football.com/ua/rss/all.xml')
]
__author__ = 'bugmen00t'
publisher = '1766 TEAM EOOD'
category = 'news'
cover_url = u'https://yt3.googleusercontent.com/11FSvKeWcjFhzKrO7nXZdc-I__UeZ0mhZwbwyOHtnx_1-q6d0zQ2LbOt2duNCY06JVg2cGXS-g=s900-c-k-c0x00ffffff-no-rj'
no_stylesheets = False
remove_javascript = False
auto_cleanup = False
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 200
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'show-post'})
remove_tags = [
dict(name='form'),
dict(name='iframe'),
dict(name='div', attrs={'class': 'language'}),
dict(name='div', attrs={'class': 'article__read-also'}),
dict(name='div', attrs={'class': 'card-player'}),
dict(name='div', attrs={'class': 'show-post-socials'})
]
# Replacing articles in Ukraininan for RU-feed
# def print_version(self, url):
# return url.replace('ua-football.com/ua/', 'ua-football.com/')
Football.ua: soccer news portal from Ukraine. Favicon.
UNIAN.net: Ukrainian Independent News Agency of News, one of the most cited source of news from across Ukraine. Favicon
Russian version (fixed)
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Unian(BasicNewsRecipe):
title = '\u0423\u041D\u0418\u0410\u041D '
description = 'Украинское Независимое Информационное Агентство Новостей – первое в Украине и самое большое независимое информационное агентство, основанное в 1993 году, лидер среди новостных медиа страны, самый цитируемый источник новостей о событиях в стране.' # noqa
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 7
max_articles_per_feed = 100
language = 'ru_UK'
cover_url = 'https://www.unian.net/images/unian-512x512.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
remove_tags = [
dict(name='span', attrs={'class': 'article__info-item comments'}),
dict(name='span', attrs={'class': 'article__info-item views'}),
dict(name='div', attrs={'class': 'read-also-slider'}),
dict(name='div', attrs={'class': 'nts-video-wrapper'})
]
feeds = [
(u'\u0423\u041D\u0418\u0410\u041D', u'https://rss.unian.net/site/news_rus.rss')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
Ukrainian version
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Unian(BasicNewsRecipe):
title = '\u0423\u041D\u0406\u0410\u041D'
description = '\u0423\u041D\u0406\u0410\u041D (\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435 \u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435 \u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u041D\u043E\u0432\u0438\u043D) - \u043F\u0435\u0440\u0448\u0435 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448\u0435 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E, \u0437\u0430\u0441\u043D\u043E\u0432\u0430\u043D\u0435 1993 \u0440\u043E\u043A\u0443, \u043B\u0456\u0434\u0435\u0440 \u0441\u0435\u0440\u0435\u0434 \u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0445 \u043C\u0435\u0434\u0456\u0430 \u043A\u0440\u0430\u0457\u043D\u0438, \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448 \u0446\u0438\u0442\u043E\u0432\u0430\u043D\u0435 \u0434\u0436\u0435\u0440\u0435\u043B\u043E \u043D\u043E\u0432\u0438\u043D \u043F\u0440\u043E \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0456.'
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 7
max_articles_per_feed = 100
language = 'uk'
cover_url = 'https://www.unian.ua/images/unian-512x512.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
remove_tags = [
dict(name='span', attrs={'class': 'article__info-item comments'}),
dict(name='span', attrs={'class': 'article__info-item views'}),
dict(name='div', attrs={'class': 'read-also-slider'}),
dict(name='div', attrs={'class': 'nts-video-wrapper'})
]
feeds = [
(u'\u0423\u041D\u0406\u0410\u041D', u'https://rss.unian.net/site/news_ukr.rss')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
English version (no updates since April 2023)
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Unian(BasicNewsRecipe):
title = 'UNIAN'
description = 'UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency, first in Ukraine, founded in 1993, remaining the leader among the country\'s news media, being the most cited source of news from across Ukraine.'
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 30
max_articles_per_feed = 100
language = 'en_UK'
cover_url = 'https://www.unian.info/images/unian-512x512.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
remove_tags = [
dict(name='span', attrs={'class': 'article__info-item comments'}),
dict(name='span', attrs={'class': 'article__info-item views'}),
dict(name='div', attrs={'class': 'read-also-slider'}),
dict(name='div', attrs={'class': 'nts-video-wrapper'})
]
feeds = [
(u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
ЭХО: reincarnated news portal by former Echo Moskvy journalists. Favicon. Replacememt for older defunct recipe
Fixes needed: - No images in articles (webp)
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class EchoMsk(BasicNewsRecipe):
title = '\u042D\u0425\u041E'
__author__ = 'bugmen00t'
description = '\u042D\u0425\u041E - \u043A\u0430\u043A \u043D\u0430 \u0441\u0442\u0430\u0440\u043E\u043C \u0434\u043E\u0431\u0440\u043E\u043C \u0440\u0430\u0434\u0438\u043E'
publisher = 'Radio Echo GmbH'
category = 'news'
cover_url = u'https://echofm.online/logo.png'
language = 'ru'
no_stylesheets = True
remove_javascript = False
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 50
remove_tags_before = dict(name='article')
remove_tags_after = dict(name='article')
remove_tags = [
dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}),
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}),
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'})
]
feeds = [
('\u0413\u043B\u0430\u0432\u043D\u043E\u0435', 'https://echofm.online/feed'),
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://echofm.online/news/feed'),
('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://echofm.online/opinions/feed'),
('\u0414\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u044B', 'https://echofm.online/documents/feed')
]
Продолжение следует: digital media founded by Novaya Gazeta journalist Dmitry Kanygin. Favicon.
Fixes needed: Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class ProSleduet(BasicNewsRecipe):
title = '\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442'
__author__ = 'bugmen00t'
description = '\u0414\u0438\u0434\u0436\u0438\u0442\u0430\u043B-\u043F\u0440\u043E\u0435\u043A\u0442 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432 \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB'
publisher = 'Pavel Kanygin, Natalia Zhdanova'
category = 'news'
cover_url = u'https://prosleduet.media/wp-content/themes/prosle/assets/img/logo.svg'
language = 'ru'
no_stylesheets = True
remove_javascript = False
auto_cleanup = False
oldest_article = 7
max_articles_per_feed = 20
remove_tags_before = dict(name='div', attrs={'class': 'container'})
remove_tags_after = dict(name='div', attrs={'class': 'container'})
remove_tags = [
dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'})
]
feeds = [
# ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'),
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://prosleduet.media/category/news/feed/'),
('\u041B\u044E\u0434\u0438', 'https://prosleduet.media/category/people/feed/'),
('\u0421\u044E\u0436\u0435\u0442\u044B', 'https://prosleduet.media/category/syuzhety/feed/'),
('\u041F\u043E\u0434\u043A\u0430\u0441\u0442\u044B', 'https://prosleduet.media/category/podcasts/feed/'),
('\u0420\u0430\u0437\u0431\u043E\u0440\u044B', 'https://prosleduet.media/category/details/feed/'),
('\u0413\u043B\u0443\u0431\u0438\u043D\u043D\u0430\u044F \u0420\u043E\u0441\u0441\u0438\u044F', 'https://prosleduet.media/category/glubinnaya-rossiya/feed/')
]
|