View Single Post
Old 08-26-2011, 07:57 AM   #1
Divingduck
Fanatic
Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.Divingduck can successfully navigate the Paris bus system.
 
Posts: 531
Karma: 36672
Join Date: Nov 2010
Location: Germany
Device: Sony PRS-650
Update request for Sueddeutsche Zeitung News Recipe

Some of the links are broken and need to be change. I put all actual avialable Rss-Feeds for Suedeutsche Zeitung in this update. The old query entries are set inactive but still there (lines starting with #).

Spoiler:

__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

'''
Fetch sueddeutsche.
'''
from calibre.web.feeds.news import BasicNewsRecipe


class Sueddeutsche(BasicNewsRecipe):

title = u'Süddeutsche'
description = 'News from Germany'
__author__ = 'Oliver Niesner and Sujata Raman, update Armin Geller 2011-08-26'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
oldest_article = 7
max_articles_per_feed = 50
no_stylesheets = True
language = 'de'

encoding = 'utf-8'
remove_javascript = True


remove_tags = [ dict(name='link'), dict(name='iframe'),
dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_A D",
"SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),

dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
"pages closed","basebox right narrow","headslot galleried"]}),

dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr 2",
"item","videoBigButton","articlefooter full-column",
"bildbanderolle full-column","footerCopy padleft5"]}),

dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
dict(name='div', attrs={'style':["position:relative;"]}),
dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r1000000 0"]}),
dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","page Aktiv","bgcontent"]}),
dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","ac tions"]}),
dict(name='td', attrs={'class':["artikelDruckenRight"]}),
dict(name='p', text = "ANZEIGE")
]
remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]

extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
body{font-family:Arial,Helvetica,sans-serif; }
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''

# old original set news queryies for Sueddeutsche Rss-Feeds
# feeds = [(u'Topthemen', u'http://suche.sueddeutsche.de/query/politik/-docdatetime/drilldown/%C2%A7documenttype%3AArtikel?output=rss')]
# feeds = [(u'Wissen', u'http://suche.sueddeutsche.de/query/wissen/nav/%C2%A7ressort%3AWissen/sort/-docdatetime?output=rss'),
# (u'Politik', u'http://suche.sueddeutsche.de/query/politik/nav/%C2%A7ressort%3APolitik/sort/-docdatetime?output=rss'),
# (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/wirtschaft/nav/%C2%A7ressort%3AWirtschaft/sort/-docdatetime?output=rss'),
# (u'Finanzen', u'http://suche.sueddeutsche.de/query/finanzen/nav/%C2%A7ressort%3AGeld/sort/-docdatetime?output=rss'),
# (u'Kultur', u'http://suche.sueddeutsche.de/query/kultur/nav/%C2%A7ressort%3AKultur/sort/-docdatetime?output=rss'),
# (u'Sport', u'http://suche.sueddeutsche.de/query/sport/nav/%C2%A7ressort%3ASport/sort/-docdatetime?output=rss'),
# (u'Bayern', u'http://suche.sueddeutsche.de/query/bayern/nav/%C2%A7ressort%3ABayern/sort/-docdatetime?output=rss'),
# (u'Panorama', u'http://suche.sueddeutsche.de/query/panorama/sort/-docdatetime?output=rss'),
# (u'Leben&Stil', u'http://suche.sueddeutsche.de/query/stil/nav/%C2%A7ressort%3A%22Leben%20%26%20Stil%22/sort/-docdatetime?output=rss'),
# (u'Gesundheit', u'http://suche.sueddeutsche.de/query/gesundheit/nav/%C2%A7ressort%3AGesundheit/sort/-docdatetime?output=rss'),
# (u'Auto&Reise', u'http://suche.sueddeutsche.de/query/automobil/nav/%C2%A7ressort%3A%22Auto%20%26%20Mobil%22/sort/-docdatetime?output=rss'),
# (u'Computer', u'http://suche.sueddeutsche.de/query/computer/nav/%C2%A7ressort%3AComputer/sort/-docdatetime?output=rss'),
# (u'Job&Karriere', u'http://suche.sueddeutsche.de/query/job/nav/%C2%A7ressort%3A%22Job%20%26%20Karriere%22/sort/-docdatetime?output=rss'),
# (u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
# ]

feeds = [
(u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
(u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
(u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
(u'München&Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
(u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
(u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
(u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
(u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
(u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
(u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
(u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
(u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
(u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
(u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
]

def print_version(self, url):
main, sep, id = url.rpartition('/')
return main + '/2.220/' + id

Last edited by Divingduck; 08-26-2011 at 08:02 AM.
Divingduck is offline   Reply With Quote