View Single Post
Old 11-18-2011, 06:21 AM   #3
schuster
Zealot
schuster doesn't litterschuster doesn't litter
 
Posts: 116
Karma: 100
Join Date: Jan 2011
Location: Germany / NRW /Köln
Device: prs-650 / prs-350 /kindle 3
Update - cleaner content

Code:
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):

    title = 'Heise-online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
    publisher   = 'Heise Zeitschriften Verlag GmbH & Co. KG'
    use_embedded_content   = False
    language = 'de'
    oldest_article = 2
    max_articles_per_feed = 35
    rescale_images = True
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True


    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
    remove_tags = [dict(id='navi_top_container'),
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
                            dict(id='subnavi'),
                            dict(id='social_bookmarks'),
                            dict(id='permalink'),
                            dict(id='content_foren'),
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
                            dict(id='sitemap'),
                            dict(name='div', attrs={'id':'sitemap'}),
                            dict(name='ul', attrs={'class':'erste_zeile'}),
                            dict(name='ul', attrs={'class':'zweite_zeile'}),
                            dict(name='div', attrs={'class':'navi_top_container'})]

    feeds =  [ 
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]

    def print_version(self, url):
        return url + '?view=print'
schuster is offline   Reply With Quote