View Single Post
Old 05-09-2011, 07:03 AM   #7
syntaxis
Junior Member
syntaxis began at the beginning.
 
Posts: 5
Karma: 10
Join Date: Nov 2010
Device: Kindle Paperwhite (2014)
Hi,

I've rewritten the recipe (pics are also included now)

Code:
# -*- coding: utf-8 -*-

import re
from calibre.web.feeds.news import BasicNewsRecipe

class TelepolisNews(BasicNewsRecipe):
    title          = u'Telepolis (News+Artikel)'
    __author__ = 'syntaxis'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from Telepolis'
    category = 'news'
    oldest_article = 1
    max_articles_per_feed = 100
    recursion = 0
    no_stylesheets = True
    encoding = "utf-8"
    language = 'de'

    
    remove_empty_feeds = True

    

    keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
    remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}), 
	        dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
	        dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
	        ,dict(attrs={'class':'tp-url'}),dict(attrs={'class':'blog-name entry_'}) ]

    remove_tags_after  = [dict(name='span', attrs={'class':['breadcrumb']})]


    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
        '--comment'  , description
        , '--category' , category
        , '--publisher', publisher
    ]

    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'


    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
syntaxis is offline   Reply With Quote