View Single Post
Old 08-10-2022, 08:15 AM   #22
bugmen00t
Connoisseur
bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!
 
bugmen00t's Avatar
 
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
New recipes (part 10 of ??)

Another bunch of Russian recipes, and a few unrelated ones.

Wonderzine: style, beauty, fashion and lifestyle blog. Favicon.
Fixes needed:
  • Partial text in paywalled articles
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Wonderzine(BasicNewsRecipe):
    title           	  = 'Wonderzine'
    __author__            = 'bugmen00t'
    description           = '\u041E\u043D\u043B\u0430\u0439\u043D-\u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u0434\u043B\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u0436\u0435\u043D\u0449\u0438\u043D \u0438 \u0438\u0445 \u0434\u0440\u0443\u0437\u0435\u0439. \u041C\u044B \u043F\u0438\u0448\u0435\u043C \u043E \u043C\u043E\u0434\u0435 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435, \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0445 \u0442\u0435\u043D\u0434\u0435\u043D\u0446\u0438\u044F\u0445 \u0438 \u0440\u0430\u0437\u0432\u043B\u0435\u0447\u0435\u043D\u0438\u044F\u0445, \u0437\u0434\u043E\u0440\u043E\u0432\u044C\u0435 \u0438 \u0441\u0435\u043A\u0441\u0435, \u044D\u0442\u0438\u043A\u0435 \u0438 \u0432\u043E\u0437\u043C\u043E\u0436\u043D\u043E\u0441\u0442\u044F\u0445, \u043E\u0441\u043E\u0437\u043D\u0430\u043D\u043D\u044B\u0445 \u0442\u0440\u0430\u0442\u0430\u0445 \u0438 \u0440\u0430\u0437\u043D\u043E\u043E\u0431\u0440\u0430\u0437\u0438\u0438 \u043A\u0440\u0430\u0441\u043E\u0442\u044B.'
    publisher             = 'Redefine Media holding'
    category              = 'blog'
    cover_url = u'https://lamcdn.net/wonderzine.com/post-og_image/WQRiRGqLlcKyEHsnOS-sUw.png'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 14
    max_articles_per_feed = 20

    remove_tags_before = dict(name='h1')

    remove_tags_after = dict(name='div', attrs={'class': 'is_article_end'})

    remove_tags =   [
        dict(name='ul', attrs={'class': 'post-meta'}),
        dict(name='a', attrs={'class': 'flow'}),
        dict(name='a', attrs={'class': 'banner-ad-link'}),
        dict(name='div', attrs={'class': 'share-buttons-bar'}),
        dict(name='div', attrs={'class': 'pseudosidebar'})        
        ] 

    feeds = [
        ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://www.wonderzine.com/feeds/posts.atom'),
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.wonderzine.com/feeds/posts.atom?news=only'),
        ('\u0416\u0438\u0437\u043D\u044C', 'https://www.wonderzine.com/feeds/posts.atom?topic=life'),
        ('\u0417\u0434\u043E\u0440\u043E\u0432\u044C\u0435', 'https://www.wonderzine.com/feeds/posts.atom?topic=health'),
        ('\u041A\u0440\u0430\u0441\u043E\u0442\u0430', 'https://www.wonderzine.com/feeds/posts.atom?topic=beauty'),
        ('\u0420\u0430\u0437\u0432\u043B\u0435\u0447\u0435\u043D\u0438\u044F', 'https://www.wonderzine.com/feeds/posts.atom?topic=entertainment'),
        ('\u0421\u0442\u0438\u043B\u044C', 'https://www.wonderzine.com/feeds/posts.atom?topic=style'),
        ('\u0412\u043E\u0437\u043C\u043E\u0436\u043D\u043E\u0441\u0442\u0438', 'https://www.wonderzine.com/feeds/posts.atom?topic=opportunities')
        ]

Vedomosit: daily newspaper; non-paywlled content only. Favicon.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Vedomosti(BasicNewsRecipe):
    title           	  = '\u0412\u0435\u0434\u043E\u043C\u043E\u0441\u0442\u0438'
    __author__            = 'bugmen00t'
    description           = '\u0415\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u0430\u044F \u0434\u0435\u043B\u043E\u0432\u0430\u044F \u0433\u0430\u0437\u0435\u0442\u0430.'
    publisher             = '\u0410\u041E \u00AB\u0411\u0438\u0437\u043D\u0435\u0441 \u041D\u044C\u044E\u0441 \u041C\u0435\u0434\u0438\u0430\u00BB'
    category              = 'newspaper'
    cover_url = u'https://id.vedomosti.ru/assets/chopick-13077eee55066e639f3a037834ecf11279d3f402e9f5cac7162d13183dfd9d20.jpg'
    language              = 'ru'
    no_stylesheets        = True
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 200

    remove_tags_before = dict(name='header', attrs={'class': 'article__header'})

    remove_tags_after = dict(name='div', attrs={'class': 'article__main'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'article__tools'}),
        dict(name='div', attrs={'class': 'box-inset-link box-inset-link--card'}),
        dict(name='div', attrs={'class': 'box-inset-link box-inset-link--card box-inset-link--link'})
        ] 

    feeds = [
        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.vedomosti.ru/rss/news')
        ]

Bellingcat: OSINT investigaton and citizen journalism blog. Favicon.

English version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = 'Bellingcat'
    __author__            = 'bugmen00t'
    description           = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'en'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://www.bellingcat.com/feed/')
        ]

Russian version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
    __author__            = 'bugmen00t'
    description           = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://ru.bellingcat.com/feed/')
        ]

Ukrainian version:
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
    __author__            = 'bugmen00t'
    description           = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'uk'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://uk.bellingcat.com/feed/')
        ]

Version en français (no updates since 2021):
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = 'Bellingcat'
    __author__            = 'bugmen00t'
    description           = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'fr'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 300
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://fr.bellingcat.com/feed/')
        ]

Versión en Español (no updates since 2021):
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = 'Bellingcat'
    __author__            = 'bugmen00t'
    description           = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.'
    publisher             = 'Stichting Bellingcat'
    category              = 'blog'
    cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
    language              = 'es'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 300
    max_articles_per_feed = 10


    remove_tags_before = dict(name='div', attrs={'class': 'container'})

    remove_tags_after = dict(name='div', attrs={'class': 'container'})

    remove_tags =   [
        dict(name='div', attrs={'class': 'social social--share'}),
        dict(name='div', attrs={'class': 'singular__related'})
        ] 

    feeds = [
        ('Bellingcat', 'https://es.bellingcat.com/feed/')
        ]

Росбалт: federal news agency with emphasis on Saint Petersburg. Favicon.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Rosbalt(BasicNewsRecipe):
    title           	  = '\u0420\u043E\u0441\u0431\u0430\u043B\u0442'
    __author__            = 'bugmen00t'
    description           = '\u0424\u0435\u0434\u0435\u0440\u0430\u043B\u044C\u043D\u043E\u0435 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E-\u0430\u043D\u0430\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E. \u041A\u0440\u0443\u0433\u043B\u043E\u0441\u0443\u0442\u043E\u0447\u043D\u043E \u0441\u043B\u0435\u0434\u0438\u043C \u0437\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0432 \u0441\u0442\u0440\u0430\u043D\u0435 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0443\u0431\u043B\u0438\u043A\u0443\u0435\u043C \u0441\u0430\u043C\u044B\u0435 \u0441\u0432\u0435\u0436\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0432\u0435\u0434\u0443\u0449\u0438\u0445 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0438\u043D\u0442\u0435\u0440\u0432\u044C\u044E \u0441 \u0432\u0435\u0434\u0443\u0449\u0438\u043C\u0438 \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u0442\u0435\u043B\u044F\u043C\u0438 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u043E\u0433\u043E \u0431\u0438\u0437\u043D\u0435\u0441\u0430 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0438.'
    publisher             = '\u0410\u041E \u00AB\u0420\u0421-\u0411\u0430\u043B\u0442\u00BB'
    category              = 'newspaper'
    cover_url = u'https://i.ytimg.com/vi/eeMqJGbjuY0/maxresdefault.jpg'
    language              = 'ru'
    no_stylesheets        = False
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 7
    max_articles_per_feed = 100


    remove_tags_before = dict(name='div', attrs={'class': 'main-content'})

    remove_tags_after = dict(name='div', attrs={'class': 'newstext'})

    remove_tags =   [
        dict(name='nav'),
        dict(name='aside'),
        dict(name='div', attrs={'class': 'news-left'}),
        dict(name='div', attrs={'class': 'rkngov'}),
        dict(name='div', attrs={'class': 'eye'})
        ] 

    feeds = [
        ('\u0420\u043E\u0441\u0431\u0430\u043B\u0442', 'https://www.rosbalt.ru/feed/')
        ]

Debunking Denialism: blog about pseudoscience in everyday life. Favicon.
Spoiler:
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class Bellingcat(BasicNewsRecipe):
    title           	  = 'Debunking Denialism'
    __author__            = 'bugmen00t'
    description           = 'Debunking Denialism is a website dedicated to the refutation of pseudoscience and denialism by applying scientific skepticism and defending evidence-based science. Fighting pseudoscience and quackery with reason and evidence.'
    publisher             = 'Debunking Denialism'
    category              = 'blog'
    cover_url = u'https://i0.wp.com/debunkingdenialism.com/wp-content/uploads/2017/06/cropped-newestblavatar.jpg'
    language              = 'en'
    no_stylesheets        = True
    remove_javascript = False
    auto_cleanup   = False
    oldest_article = 60
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='article')

    remove_tags =   [
        dict(name='div', attrs={'class': 'above-entry-meta'}),
        dict(name='time', attrs={'class': 'updated'}),
        dict(name='p', attrs={'class': 'ddtag'}),
        dict(name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'}),
        dict(name='div', attrs={'class': 'sharedaddy sd-block sd-like jetpack-likes-widget-wrapper jetpack-likes-widget-unloaded'})
        ] 

    feeds = [
        ('Debunking Denialism', 'https://debunkingdenialism.com/feed/')
        ]
        
    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-orig-file': True}):
            img['src'] = img['data-orig-file']
        return soup
Attached Images
         
Attached Files
File Type: recipe wonderzine.recipe (3.1 KB, 498 views)
File Type: recipe vedomosti.recipe (1.6 KB, 501 views)
File Type: recipe bellingcat_en.recipe (1.6 KB, 506 views)
File Type: recipe bellingcat_ru.recipe (3.5 KB, 500 views)
File Type: recipe bellingcat_uk.recipe (3.6 KB, 504 views)
File Type: recipe bellingcat_fr.recipe (1.7 KB, 488 views)
File Type: recipe bellingcat_es.recipe (1.6 KB, 488 views)
File Type: recipe rosbalt.recipe (2.6 KB, 496 views)
File Type: recipe debunkingdenialism.recipe (1.7 KB, 504 views)

Last edited by bugmen00t; 08-10-2022 at 08:18 AM.
bugmen00t is offline   Reply With Quote