View Single Post
Old 11-15-2010, 02:00 PM   #1
Junior Member
Szing began at the beginning.
Posts: 1
Karma: 10
Join Date: Nov 2010
Device: Kobo
Globe and Mail Recipe Rewrite..

Here is a rewrite of the Globe & Mail recipe.
  • It has no ads
  • It has pictures related to the article
  • News download size is around 1.5-2 MB (epub)
  • It may have a problem with some multi page articles
  • It does not have author pictures

class AdvancedUserRecipe1287083651(BasicNewsRecipe):
    title          = u'Globe & Mail'
    __license__   = 'GPL v3'
    __copyright__ = '2010, Szing'
    oldest_article = 2
    no_stylesheets = True
    max_articles_per_feed = 100
    encoding               = 'utf8'
    publisher              = 'Globe & Mail'
    category               = 'news, Canada, world'
    language               = 'en_CA'
    extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'

    feeds          = [
      (u'Top National Stories', u''),
      (u'Business', u''),
      (u'Commentary', u''),
      (u'Blogs', u''),
      (u'Facts & Arguments', u''),
      (u'Technology', u''),
      (u'Investing', u''),
      (u'Top Polical Stories', u''),
      (u'Arts', u''),
      (u'Life', u''),
      (u'Real Estate', u''),
      (u'Auto', u''),
      (u'Sports', u'')

    keep_only_tags = [
      dict(name='h2', attrs={'id':'articletitle'}),
      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
      dict(name='id', attrs={'class':'article'}),
      dict(name='table', attrs={'class':'todays-market'}),
      dict(name='header', attrs={'id':'leadheader'})

    remove_tags = [
      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})

    #this has to be here or the text in the article appears twice.
    remove_tags_after = [dict(id='article')]

    #Use the mobile version rather than the web version
    def print_version(self, url):
	return url + '&service=mobile'
Szing is offline   Reply With Quote