Here's a recipe i've managed to codge together.
The epub output isn't so good, but i set the default ouput to LRF in Calibre and the result is much better on my PRS300. (takes +20 mins to process on my laptop)
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
oldest_article = 1
max_articles_per_feed = 100
keep_only_tags = [
dict(name='h1', attrs={'':''}),
dict(name='h2', attrs={'class':'h2'}),
dict(name='div', attrs={'class':'art-lft'})
]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm' ]}),
dict(name='h3', attrs={'':''})]
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
Last edited by scissors; 05-24-2011 at 02:36 PM.
|