OK, trying to write a profile, but really struggling. I get one of two things: a blank document, or the script hangs.
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
import re
class SMH(DefaultProfile):
title = 'SMH'
max_recursions = 2
oldest_article = 1
no_stylesheets = True
preprocess_regexps = \
[ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
# Remove links to homepage
(r'<P>[ <a href="/">SMH</a> ]</P>', lambda match : ''),
# and business pages
(r'<p><a href="http://business.smh.com.au.*', lambda match : ''),
]
]
def get_feeds(self):
return [ ('SMH', 'http://smh.com.au/text') ]
|