View Single Post
Old 05-08-2016, 04:24 AM   #10
Aimylios
Member
Aimylios began at the beginning.
 
Posts: 17
Karma: 10
Join Date: Apr 2016
Device: Tolino Vision 3HD
Hi,

the addresses of the focus.de RSS feeds have been changed. Here's an updated version of the focus_de.recipe.

Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function

'''
focus.de
'''

from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1305567197(BasicNewsRecipe):
    title       = 'Focus (DE)'
    __author__  = 'Anonymous'
    description = 'RSS-Feeds von Focus.de'
    language    = 'de'

    oldest_article            = 7
    max_articles_per_feed     = 100
    no_stylesheets            = True
    remove_javascript         = True
    use_embedded_content      = False
    remove_empty_feeds        = True
    ignore_duplicate_articles = {'title', 'url'}

    feeds = [
        ('Politik', 'http://rss.focus.de/politik/'),
        ('Finanzen', 'http://rss.focus.de/finanzen/'),
        ('Gesundheit', 'http://rss.focus.de/gesundheit/'),
        ('Panorama', 'http://rss.focus.de/panorama/'),
        ('Digital', 'http://rss.focus.de/digital/'),
        ('Reisen', 'http://rss.focus.de/reisen/')
    ]

    keep_only_tags = [
        dict(name='div', attrs={'id':'article'})
    ]

    remove_tags = [
        dict(name='div', attrs={'class':['inimagebuttons',
                                         'kolumneHead clearfix']})
    ]

    remove_attributes = ['width', 'height']

    extra_css = 'h1 {font-size: 1.6em; text-align: left; margin-top: 0em} \
                 h2 {font-size: 1em; text-align: left} \
                 .overhead {margin-bottom: 0em} \
                 .caption {font-size: 0.6em}'

    def print_version(self, url):
        return url + '?drucken=1'

    def preprocess_html(self, soup):
        # remove useless references to videos
        for item in soup.findAll('h2'):
            if item.string:
                txt = item.string.upper()
                if txt.startswith('IM VIDEO:') or txt.startswith('VIDEO:'):
                    item.extract()
        return soup
Aimylios is offline   Reply With Quote