View Single Post
Old 06-03-2011, 06:56 AM   #2
schuster doesn't litterschuster doesn't litter
Posts: 116
Karma: 100
Join Date: Jan 2011
Location: Germany / NRW /Köln
Device: prs-650 / prs-350 /kindle 3

her a new update for

1- fetch all regional feeds
2- cleaner layout

import string, re
from import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):

    title          = u''
    __author__  = 'schuster'
    oldest_article = 1
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True

# get cover from myspace
    cover_url = ''
    masthead_url = ''   

# set what to fetch on the site
    remove_tags_before =  dict(name = 'h2', attrs={'id':'cover'})
    remove_tags_after = dict(name ='div', attrs={'class':'back'})

# remove things on the site that we don't want
    remove_tags = [dict(name='div', attrs={'class':'credit'}),
        dict(name='div', attrs={'class':'index'}),
        dict(name='div', attrs={'id':'zstart31'}),
        dict(name='div', attrs={'class':'hentry'}),
        dict(name='div', attrs={'class':'back'}),
        dict(name='div', attrs={'class':'pagination'}),
        dict(name='div', attrs={'class':'header'}),
        dict(name='div', attrs={'class':'element floatL'}),
        dict(name='div', attrs={'class':'stWrap'})

#  thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
# this one removes a lot of direct-link's
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
        return soup

# remove the ad's
    filter_regexps = [r'.\.smartadserver\.com']
    def skip_ad_pages(self, soup):
        return None

#get the real url behind and fetch the artikels
    def get_article_url(self, article):
        return article.get('id', article.get('guid', None))

#list of the rss source from
    feeds          = [(u'Überblick', u''),
                          (u'News', u''),
                          (u'Politik', u''),
                          (u'Unterhaltung', u''),
                          (u'Sport', u''),
                          (u'Lifestyle', u''),
                          (u'Ratgeber', u''),
                          (u'Reg. - Berlin', u''),
                          (u'Reg. - Bremen', u''),
                          (u'Reg. - Dresden', u''),
                          (u'Reg. - Düsseldorf', u''),
                          (u'Reg. - Frankfurt-Main', u''),
                          (u'Reg. - Hamburg', u''),
                          (u'Reg. - Hannover', u''),
                          (u'Reg. - Köln', u''),
                          (u'Reg. - Leipzig', u''),
                          (u'Reg. - München', u''),
                          (u'Reg. - Ruhrgebiet', u''),
                          (u'Reg. - Stuttgart', u'')
schuster is offline   Reply With Quote