This grabs all of the feeds from
The Detroit News.
There are a couple of concerns:
1 - "Outdoors" and "Real Estate" should work, but I couldn't test them because they've had no articles in those feeds for the past week.
2 - "Food" returns the same articles as "Metro/State". This is not an error in the recipe, but rather an error in the feed. It actually links to the wrong articles. I left it in because I assume they'll fix that.
3 - "Lions" works fine, but let's face it: there's really nothing you can do about the Lions.
Code:
import re
class AdvancedUserRecipe1297291961(BasicNewsRecipe):
title = u'Detroit News'
oldest_article = 2
max_articles_per_feed = 20
no_stylesheets = True
conversion_options = { 'linearize_tables' : True, }
feeds = [
(u'Headlines', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss&mime=xml'),
(u'Nation/World', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss09&mime=xml'),
(u'Metro/State', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss36&mime=xml'),
(u'Wayne County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss01&mime=xml'),
(u'Oakland County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss02&mime=xml'),
(u'Macomb County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss03&mime=xml'),
(u'Livingston County', u'http://detnews.com/apps/pbcs.dll/section?category=rss04&mime=xml'),
(u'Politics/Government', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss10&mime=xml'),
(u'Editorials', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss07&mime=xml'),
(u'Columnists', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss43&mime=xml'),
(u'Charlie LeDuff', u'http://detnews.com/apps/pbcs.dll/section?category=rss54&mime=xml'),
(u'Religion', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss11&mime=xml'),
(u'Technology', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss12&mime=xml'),
(u'Commuting', u'http://detnews.com/apps/pbcs.dll/section?category=rss05&mime=xml'),
(u'Schools', u'http://detnews.com/apps/pbcs.dll/section?category=rss06&mime=xml'),
(u'Obituaries', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss08&mime=xml'),
(u'Autos Insider', u'http://detnews.com/apps/pbcs.dll/section?category=rss25&mime=xml'),
(u'Drive', u'http://detnews.com/apps/pbcs.dll/section?category=rss26&mime=xml'),
(u'Business', u'http://detnews.com/apps/pbcs.dll/section?category=rss21&mime=xml'),
(u'Personal Finance', u'http://detnews.com/apps/pbcs.dll/section?category=rss23&mime=xml'),
(u'Real Estate', u'http://detnews.com/apps/pbcs.dll/section?category=rss24&mime=xml'),
(u'Movies', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss28&mime=xml'),
(u'TV', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss40&mime=xml'),
(u'Music/Nightlife', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss30&mime=xml'),
(u'Celebrities', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss51&mime=xml'),
(u'The Arts', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss27&mime=xml'),
(u'Food', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss29&mime=xml'),
(u'Homestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss31&mime=xml'),
(u'The Green Life', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss53&mime=xml'),
(u'Lifestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss32&mime=xml'),
(u'Health', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss34&mime=xml'),
(u'Travel', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss52&mime=xml'),
(u'Advice', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss50&mime=xml'),
(u'Pistons', u'http://detnews.com/apps/pbcs.dll/section?category=rss13&mime=xml'),
(u'Lions', u'http://detnews.com/apps/pbcs.dll/section?category=rss14&mime=xml'),
(u'Tigers', u'http://detnews.com/apps/pbcs.dll/section?category=rss15&mime=xml'),
(u'Red Wings', u'http://detnews.com/apps/pbcs.dll/section?category=rss16&mime=xml'),
(u'Michigan State', u'http://detnews.com/apps/pbcs.dll/section?category=rss18&mime=xml'),
(u'University of Michigan', u'http://detnews.com/apps/pbcs.dll/section?category=rss17&mime=xml'),
(u'Motor Sports', u'http://detnews.com/apps/pbcs.dll/section?category=rss20&mime=xml'),
(u'Golf', u'http://detnews.com/apps/pbcs.dll/section?category=rss47&mime=xml'),
(u'Outdoors', u'http://detnews.com/apps/pbcs.dll/section?category=rss19&mime=xml')
]
def print_version(self, url):
p = re.compile('(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
m = p.search(url)
return url.replace(m.group(), '&template=printart')