Hi there
here is a quickndirty snippet from me
for germany heise newsticker
its working fine for me
Code:
import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
class heise (DefaultProfile):
title = 'Heise Newsticker'
max_recursions = 2
use_pubdate = False
no_stylesheets = True
max_articles_per_feed = 30
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
(r'<!-- Site Navigation Bar -->.*?<title>', lambda match : '<title>'),
(r'</title>.*?</head>', lambda match : '</title> </head>'),
(r'<!-- allgemeine obere Navigation -->.*?</heisetext>', lambda match : ''),
(r'<table.*?</table>', lambda match : ''),
(r'<br clear="all".*?</body>', lambda match : '</div> </body>')
] ]
def get_feeds(self):
return [ ('Heise Newsticker', 'http://www.heise.de/newsticker/heise.rdf') ]
def print_version(self, url):
return url.replace('http://www.heise.de/newsticker/meldung/', 'http://www.heise.de/newsticker/meldung/print/')
have fun
Stefan