View Single Post
Old 11-21-2010, 03:44 PM   #1
syntaxis
Junior Member
syntaxis began at the beginning.
 
Posts: 5
Karma: 10
Join Date: Nov 2010
Device: Kindle Paperwhite (2014)
Updated Telepolis (News+Artikel) Recipe

Hi There,

I've updated the Telepolis recipe:
Changes:
*Now has correct Pagebreak on Kindle / Mobi Format
*Fetches Articles and News
*Not showing comments below articles anymore

Code:
# -*- coding: utf-8 -*-

__license__   = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'


import re
from calibre.web.feeds.news import BasicNewsRecipe

class TelepolisNews(BasicNewsRecipe):
    title          = u'Telepolis (News+Artikel)'
    __author__ = 'Gerhard Aigner'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from telepolis'
    category = 'news'
    oldest_article = 7
    max_articles_per_feed = 100
    recursion = 0
    no_stylesheets = True
    encoding = "utf-8"
    language = 'de_AT'

    use_embedded_content =False
    remove_empty_feeds = True

    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]

    keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]

    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
        '--comment'  , description
        , '--category' , category
        , '--publisher', publisher
    ]

    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    def get_article_url(self, article):
        '''if the linked article is of kind artikel don't take it'''
        if (article.link.count('artikel') > 1) :
            return None
        return article.link

    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
syntaxis is offline   Reply With Quote