View Single Post
Old 05-14-2011, 12:45 PM   #1
schuster
Zealot
schuster doesn't litterschuster doesn't litter
 
Posts: 119
Karma: 100
Join Date: Jan 2011
Location: Germany / NRW /Köln
Device: prs-650 / prs-350 /kindle 3
recipe for Heise Newsticker - german

Code:
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class AdvancedUserRecipe1303841067(BasicNewsRecipe):

    title          = u'Heise Newsticker'
    __author__  = 'schuster'
    remove_tags_before = dict(id='mitte_news')
    remove_tags_after  = dict(id='ivw_pixel_intern')
    remove_tags = [dict(attrs={'class':['navi_top_logo', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
                dict(id=['navi_top_container', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
                dict(name=['script', 'noscript', 'style'])]
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    recursions             = 0
    remove_javascript      = True

 
    def print_version(self, url):
          return url + '?view=print'
    feeds          = [
(u'Alle news', u'http://www.heise.de/newsticker/heise.rdf')

]


    filter_regexps = [r'ads\.doubleclick\.net']
schuster is offline   Reply With Quote