View Single Post
Old 05-15-2011, 06:40 AM   #9
syntaxis
Junior Member
syntaxis began at the beginning.
 
Posts: 5
Karma: 10
Join Date: Nov 2010
Device: Kindle Paperwhite (2014)
Hi Juco,

if you want to see more arcticles you have to change that line
Code:
 oldest_article = 1
1 means it only fetches articles that are not older than 1 day.

2 +3 I made some changes, should work now

Code:
# -*- coding: utf-8 -*-

import re
from calibre.web.feeds.news import BasicNewsRecipe

class TelepolisNews(BasicNewsRecipe):
    title          = u'Telepolis (News+Artikel)'
    __author__ = 'syntaxis'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from Telepolis'
    category = 'news'
    oldest_article = 1
    max_articles_per_feed = 100
    recursion = 0
    no_stylesheets =True
    encoding = "utf-8"
    language = 'de'

    
    remove_empty_feeds = True

    

    keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
    remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}), 
	        dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
	        dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
	        ,dict(attrs={'class':'tp-url'}),dict(name= 'div', attrs={'class':['blog-letter e-news','blog-letter m-news','blog-letter w-news','blog-letter t-news',
		'blog-letter k-news','blog-letter s-news']}) ]

    remove_tags_after  = [dict(name='span', attrs={'class':['breadcrumb']})]


    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
        '--comment'  , description
        , '--category' , category
        , '--publisher', publisher
    ]

    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'


    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup

extra_css = '''
                h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:25px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:22px; }
                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
                '''

Last edited by syntaxis; 05-16-2011 at 08:16 AM.
syntaxis is offline   Reply With Quote