View Single Post
Old 08-04-2017, 11:27 AM   #2
bugmen00t
Connoisseur
bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!bugmen00t rocks like Gibraltar!
 
bugmen00t's Avatar
 
Posts: 82
Karma: 100000
Join Date: Aug 2015
Device: Kindle Keyboard 3G + Kindle Voyage WiFi + Kindle PW11 Kids WiFi
The Insider

Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class AdjectiveSpecies(BasicNewsRecipe):
    title          = u'The Insider'
    cover_url = u'http://theins.ru/wp-content/uploads/2013/10/logo_insider.png'
    description           = '\u0420\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0420\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0410\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430'
    publisher             = 'theins.ru'
    category              = 'news'
    language              = 'ru'
    no_stylesheets        = True
    remove_javascript = True
    oldest_article = 300
    max_articles_per_feed = 100
#    auto_cleanup   = True

    remove_tags_before = dict(name='div', attrs={'id':'wrapper'})
    remove_tags_after  = dict(name='p', attrs={'style':' color: #999999;'})
    remove_tags     = [dict(name='div',attrs={'class':'post-share'}),
                        dict(name='div', attrs={'class':'post-share fixed-likes'}),
                         dict(name='div', attrs={'class':'topads'}),
                         dict(name='div', attrs={'class':'pre-content-line'}),
                         dict(name='div', attrs={'class':'author-opinions'}),
                         dict(name='div', attrs={'class':'content-banner'}),
                         dict(name='div', attrs={'id':'sidebar'})
                         ]
      

    feeds          = [
                          (u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', u'http://theins.ru/category/news/feed'),
                          (u'\u041F\u043E\u043B\u0438\u0442\u0438\u043A\u0430', u'http://theins.ru/category/politika/feed'),
                          (u'\u0410\u043D\u0442\u0438\u0444\u0435\u0439\u043A', u'http://theins.ru/category/antifake/feed'),
                          (u'\u041A\u043E\u0440\u0440\u0443\u043F\u0446\u0438\u044F', u'http://theins.ru/category/korrupciya/feed'),
                          (u'\u0418\u0441\u043F\u043E\u0432\u0435\u0434\u044C', u'http://theins.ru/category/confession/feed'),
                          (u'\u041E\u0431\u0449\u0435\u0441\u0442\u0432\u043E', u'http://theins.ru/category/obshestvo/feed'),
                          (u'\u0418\u0441\u0442\u043E\u0440\u0438\u044F', u'http://theins.ru/category/history/feed'),
                          (u'\u042D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430', u'http://theins.ru/category/ekonomika/feed'),
                          (u'\u041C\u043D\u0435\u043D\u0438\u044F', u'http://theins.ru/category/opinions/feed'),
                          (u'\u041F\u0435\u0440\u0435\u0432\u043E\u0434\u044B', u'http://theins.ru/category/perevody/feed'),
                          (u'\u041B\u043E\u043D\u0433\u0440\u0438\u0434', u'http://theins.ru/category/longread/feed')
                    ]
bugmen00t is offline   Reply With Quote