View Single Post
Old 04-15-2012, 10:49 PM   #2
kovidgoyal
creator of calibre
kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.
 
kovidgoyal's Avatar
 
Posts: 45,435
Karma: 27757438
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
Here you go:

Code:
import urllib, re
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1279258912(BasicNewsRecipe):
    title          = u'Orlando Sentinel'
    oldest_article = 3
    max_articles_per_feed = 100

    feeds          = [
	(u'News', u'http://feeds.feedburner.com/orlandosentinel/news'),
	(u'Opinion', u'http://feeds.feedburner.com/orlandosentinel/news/opinion'),
	(u'Business', u'http://feeds.feedburner.com/orlandosentinel/business'),
	(u'Technology', u'http://feeds.feedburner.com/orlandosentinel/technology'),
	(u'Space and Science', u'http://feeds.feedburner.com/orlandosentinel/news/space'),
	(u'Entertainment', u'http://feeds.feedburner.com/orlandosentinel/entertainment'),
	(u'Life and Family', u'http://feeds.feedburner.com/orlandosentinel/features/lifestyle'),
	]
    __author__ = 'rty'
    pubisher  = 'OrlandoSentinel.com'
    description           = 'Orlando, Florida, Newspaper'
    category              = 'News, Orlando, Florida'


    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'

    auto_cleanup = True

    def get_article_url(self, article):
        ans = None
        try:
            s = article.summary
            ans = urllib.unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
        if ans is None:
            link = article.get('feedburner_origlink', None)
            if link and link.split('/')[-1]=="story01.htm":
                link=link.split('/')[-2]
                encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                        '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
                        '0S':'//'}
                for k, v in encoding.iteritems():
                    link = link.replace(k, v)
                ans = link
            elif link:
                ans = link
        if ans is not None:
            return ans.replace('?track=rss', '')
kovidgoyal is offline   Reply With Quote