View Single Post
Old 11-13-2011, 02:47 AM   #2
Divingduck
Evangelist
Divingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolateDivingduck is generous with chocolate
 
Posts: 439
Karma: 33884
Join Date: Nov 2010
Location: Germany
Device: Sony PRS-650
There is a small error on the main index in the actual Sueddeutsche.de recipe.

Here is the correction:
Spoiler:
Code:
# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

'''
Fetch sueddeutsche.de
'''
from calibre.web.feeds.news import BasicNewsRecipe


class Sueddeutsche(BasicNewsRecipe):

    title = u'sueddeutsche.de'
    description = 'News from Germany'
    __author__ = 'Oliver Niesner and Armin Geller'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 50
    no_stylesheets = True
    language = 'de'

    encoding = 'utf-8'
    remove_javascript = True


    remove_tags = [ dict(name='link'), dict(name='iframe'),
                    dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
                          "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),

                    dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
                                 "pages closed","basebox right narrow","headslot galleried"]}),

                    dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
                             "item","videoBigButton","articlefooter full-column",
                                                     "bildbanderolle full-column","footerCopy padleft5"]}),

                    dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
                    dict(name='div', attrs={'style':["position:relative;"]}),
                    dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
                    dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
                    dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
                    dict(name='td', attrs={'class':["artikelDruckenRight"]}),
                    dict(name='p', text = "ANZEIGE")
                     ]
    remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]

    extra_css = '''
                    h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
                    a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
                    .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
                    h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
                    .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
                    body{font-family:Arial,Helvetica,sans-serif; }
                    .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;}                 '''

    feeds = [
              (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
              (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
              (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
              (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
              (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'), # AGe 2011-11-13
              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
              (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
              (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
              (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
              (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
              (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
              (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
              (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'),     # sometimes only
              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
            ]

    def print_version(self, url):
        main, sep, id = url.rpartition('/')
        return main + '/2.220/' + id
Attached Thumbnails
Click image for larger version

Name:	sueddeutsche-recipe.JPG
Views:	96
Size:	18.4 KB
ID:	78909  
Divingduck is offline   Reply With Quote