View Single Post
Old 05-31-2012, 10:08 AM   #1
timtoo
Junior Member
timtoo began at the beginning.
 
Posts: 1
Karma: 10
Join Date: May 2012
Device: PRS-T1
Rabble.ca recipe

Rabble.ca is an alternative Canadian news/opinion site.

I've been using a custom recipe for rabble.ca for a long time. I can't remember if I made it myself or snagged it from somewhere. Anyhow, today I finally decided what I was using needed improvement. So here it is:

Code:
class RabbleCa(BasicNewsRecipe):
    title          = u'Rabble.ca'
    oldest_article = 7
    max_articles_per_feed = 100

    cover_url      = 'https://upload.wikimedia.org/wikipedia/en/4/44/Rabble.png'
    masthead_url      = 'http://rabble.ca/sites/rabble/files/dreamyrabble_logo.jpg'

    feeds          = [(u'Rabble.ca', u'http://feeds.feedburner.com/rabble-news')]

    preprocess_regexps = [
           (re.compile(r'<a href="http://rabble.ca/user">.*?to post comments', re.DOTALL|re.IGNORECASE),
           lambda match: 'Tags:'),
    ]

    extra_css = """
            .print-taxonomy { display: inline }
            .print-taxonomy ul { display: inline; margin: 0px }
            .print-taxonomy ul li { display: inline; list-style: none }
            .field-type-date div { display: inline }
            .field-type-link div { display: inline }
            .field-type-text div { display: inline }
            .field-label { font-style: italic }
            """

    def print_version(self, url):
        return url.replace('http://rabble.ca/', 'http://rabble.ca/print/')

    remove_tags = [
            # print version of the web page
            dict(name='div', attrs={'class': ['print-logo']}),
            dict(name='div', attrs={'class': ['print-site_name']}),
            dict(name='hr', attrs={'class': ['print-hr']}),
            dict(name='div', attrs={'class': ['print-links']}),

            # regular web page in case you need to download them
            dict(name='div', attrs={'id': ['header']}),
            dict(name='div', attrs={'class': ['container-submenu']}),
            dict(name='div', attrs={'id': ['sidebar']}),
            dict(name='div', attrs={'id': ['footer']}),
            dict(name='div', attrs={'class': ['rabble-nodelinks rabble-nodelinks-top']}),
            dict(name='div', attrs={'class': ['rabble-nodelinks rabble-nodelinks-bottom']}),
            dict(name='div', attrs={'class': ['tags-issues']}),
            dict(name='div', attrs={'class': ['field field-type-text field-field-summary']}),
            dict(name='span', attrs={'class': ['print-footnote']}),
    ]
timtoo is offline   Reply With Quote