View Single Post
Old 04-08-2011, 07:23 AM   #4
ironcat
Junior Member
ironcat began at the beginning.
 
ironcat's Avatar
 
Posts: 5
Karma: 10
Join Date: Mar 2011
Location: Budapest, Hungary
Device: Kindle 3 Wi-Fi
You may start with this:
Spoiler:

Code:
import re
from calibre.web.feeds.recipes import BasicNewsRecipe

class LadysmithChronicle(BasicNewsRecipe):
    title                 = 'LadysmithCronicle.com'
    __author__            = u''
    description           = u'LadysmithCronicle.com'
    oldest_article        = 7
    language              = 'en'
    publisher             = 'BlackPress'
    category              = u'news'

    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif }'
    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    remove_tags_before    = dict(id='mobileStory')
    remove_tags_after     = dict(id='mobileStory')
    remove_tags           = [
                              dict(name='div', attrs={'class':'footer'})
                             ,dict(name='strong', attrs={'style':'color: #003366;'})
                             ,dict(name='a', attrs={'class':'backLink'})
                            ]
    remove_javascript     = True
    remove_empty_feeds    = True

    feeds = [
              (u'News', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/news/index.rss')
             ,(u'Sports', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/sports/index.rss')
             ,(u'Business', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/business/index.rss')
             ,(u'Entertainment', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/entertainment/index.rss')
             ,(u'Lifestyles', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/lifestyles/index.rss')
             ,(u'Community', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/community/index.rss')
             ,(u'Opinion', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/opinion/index.rss')
             ,(u'Letters', u'http://www.bclocalnews.com/vancouver_island_central/ladysmithchronicle/opinion/letters/index.rss')
            ]

    def print_version(self, url):
         return url+'?mobile=true'
ironcat is offline   Reply With Quote