View Single Post
Old 12-28-2010, 12:30 AM   #2
jimolo
Junior Member
jimolo began at the beginning.
 
Posts: 7
Karma: 10
Join Date: Dec 2010
Device: Kindle
Try this .. let me know .... (1st recipe)

Code:
__license__   = 'GPL v3'
__copyright__ = '2010, JOlo'
'''
www.theweek.com
'''

from calibre.web.feeds.news import BasicNewsRecipe

import string, re
class TheWeek(BasicNewsRecipe):
    title                 = 'The Week Magazine'
    __author__            = 'Jim Olo'
    description           = "The best of the US and international media.  Daily coverage of commentary and analysis of the day's events, as well as arts, entertainment, people and gossip, and political cartoons."
    publisher             = 'The Week Publications, Inc.'
    masthead_url           = 'http://test.theweek.com/images/logo_theweek.gif'
    cover_url              = masthead_url
    category              = 'news, politics, USA'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
    preprocess_regexps      = [(re.compile(r'<h3><a href=.*</body>', re.DOTALL), lambda match: '</body>')]
    remove_tags_before    = dict(name='h1')
    remove_tags_after     = dict(name='div', attrs={'class':'articleSubscribe4free'})
    remove_tags           = [
	                          dict(name='div', attrs={'class':['floatLeft','imageCaption','slideshowImageAttribution','postDate','utilities','cartoonInfo','left','middle','col300','articleSubscribe4free',' articleFlyout','articleFlyout floatRight','fourFreeBar']})
							 ,dict(name='div', attrs={'id':['cartoonThumbs','rightColumn','header','partners']})
							 ,dict(name='ul', attrs={'class':{'slideshowNav','hotTopicsList topicList'}})
							  ]
    remove_attributes     = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    filter_regexps = [r'www\.palmcoastdata\.com']

    feeds          =  [
               (u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'),
			   (u'Business', u'http://theweek.com/section/index/business.rss'),
			   (u'Arts-Life', u'http://theweek.com/section/index/arts_life.rss'),
			   (u'Cartoons', u'http://theweek.com/section/index/cartoon_wit/0/all-cartoons.rss')
                      ]
jimolo is offline   Reply With Quote