MobileRead Forums - View Single Post - Orlando Sentinel standard recipe not getting most news feeds

kovidgoyal · 04-15-2012, 10:49 PM

Here you go:

Code:

import urllib, re
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1279258912(BasicNewsRecipe):
    title          = u'Orlando Sentinel'
    oldest_article = 3
    max_articles_per_feed = 100

    feeds          = [
	(u'News', u'http://feeds.feedburner.com/orlandosentinel/news'),
	(u'Opinion', u'http://feeds.feedburner.com/orlandosentinel/news/opinion'),
	(u'Business', u'http://feeds.feedburner.com/orlandosentinel/business'),
	(u'Technology', u'http://feeds.feedburner.com/orlandosentinel/technology'),
	(u'Space and Science', u'http://feeds.feedburner.com/orlandosentinel/news/space'),
	(u'Entertainment', u'http://feeds.feedburner.com/orlandosentinel/entertainment'),
	(u'Life and Family', u'http://feeds.feedburner.com/orlandosentinel/features/lifestyle'),
	]
    __author__ = 'rty'
    pubisher  = 'OrlandoSentinel.com'
    description           = 'Orlando, Florida, Newspaper'
    category              = 'News, Orlando, Florida'


    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'

    auto_cleanup = True

    def get_article_url(self, article):
        ans = None
        try:
            s = article.summary
            ans = urllib.unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
        if ans is None:
            link = article.get('feedburner_origlink', None)
            if link and link.split('/')[-1]=="story01.htm":
                link=link.split('/')[-2]
                encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                        '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
                        '0S':'//'}
                for k, v in encoding.iteritems():
                    link = link.replace(k, v)
                ans = link
            elif link:
                ans = link
        if ans is not None:
            return ans.replace('?track=rss', '')