View Single Post
Old 06-26-2016, 03:50 AM   #3
jr17oo
Member
jr17oo began at the beginning.
 
Posts: 14
Karma: 10
Join Date: Jan 2012
Device: Sony PRS-T1
Kurier

Update for kurier.recipe:
  • new feed addresses
  • improve formatting

Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function

__license__   = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'

'''
kurier.at
'''

import re
from calibre.web.feeds.news import BasicNewsRecipe

class Kurier(BasicNewsRecipe):
    title                 = 'Kurier'
    __author__            = 'Darko Miletic'
    description           = 'News from Austria'
    publisher             = 'KURIER'
    category              = 'news, politics, Austria'
    oldest_article        = 2
    max_articles_per_feed = 100
    timeout               = 30
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'de_AT'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }

    feeds = [
        ('Politik', 'http://kurier.at/politik/xml/rss'),
        ('Wirtschaft', 'http://kurier.at/wirtschaft/xml/rss'),
        ('Chronik', 'http://kurier.at/chronik/xml/rss'),
        ('Kultur', 'http://kurier.at/kultur/xml/rss'),
        ('Leben', 'http://kurier.at/leben/xml/rss'),
        ('Menschen', 'http://kurier.at/menschen/xml/rss'),
        ('Sport', 'http://kurier.at/sport/xml/rss')
    ]

    keep_only_tags = [
        dict(name='article', attrs={'class':re.compile('main-article')})
    ]

    remove_tags = [
        dict(name='div', attrs={'class':'social-media-container'}),
        dict(name='section', attrs={'class':'tags'}),
        dict(name='section', attrs={'class':re.compile('comment-box')}),
        dict(name='section', attrs={'class':re.compile('related-content')}),
        dict(name='section', attrs={'class':re.compile('article-slider')}),
        dict(name='section', attrs={'class':re.compile('commentcontainer')}),
        dict(name='blockquote')
    ]

    remove_attributes = ['width','height']

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
jr17oo is offline   Reply With Quote