MobileRead Forums - View Single Post - Globe and Mail Recipe Rewrite..

Thread: Globe and Mail Recipe Rewrite..

View Single Post

01-21-2011, 09:06 PM	#10
mufc Connoisseur Posts: 99 Karma: 170 Join Date: Nov 2010 Location: Airdrie Alberta Device: Sony 650	This is made for my Sony 650. Gets rid of images and hyperlinks etc. Spoiler: #!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2011, Szing, guterm' __docformat__ = 'restructuredtext en' ''' globeandmail.com ''' from calibre.web.feeds.news import BasicNewsRecipe class TheGlobeAndMailAdvancedRecipe(BasicNewsRecipe): title = u'The Globe And Mail' __license__ = 'GPL v3' __author__ = 'Szing, guterm' oldest_article = 2 no_stylesheets = True max_articles_per_feed = 100 encoding = 'utf8' publisher = 'Globe & Mail' language = 'en_CA' extra_css = ''' h1{font-family:Georgia,serif; font-weight:bold;font-size:large;} h2{font-family:Georgia,serif; font-weight:bold;font-size:large;} p{font-family:Georgia,serif;font-size:small;} body{font-family:Georgia,serif;font-size:small;} ''' feeds = [ (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'), (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'), (u'Commentary', u'http://www.theglobeandmail.com/news/opinions/?service=rss'), (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'), (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'), (u'Personal Tech', u'http://www.theglobeandmail.com/news/technology/personal-tech/?service=rss'), (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'), (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'), (u'Opinion', u'http://www.theglobeandmail.com/news/opinions/opinion/?service=rss'), ] keep_only_tags = [ dict(name='h1'), dict(name='h2', attrs={'id':'articletitle'}), dict(name='p', attrs={'class':['leadText', 'bodyText']}), dict(name='div', attrs={'class':['news','articlecopy','columnist', 'blog']}), dict(name='id', attrs={'class':'article'}), dict(name='table', attrs={'class':'todays-market'}), dict(name='header', attrs={'id':'leadheader'}) ] remove_tags = [ dict(name='ul', attrs={'class':['pillboxcontainer arttoolsbpbx']}), dict(name='div', attrs={'class':['relcont', 'articleTools', 'ypad fontsmall', 'pagination','meta mb10']}), dict(name='a', attrs={'href':['javascript:void(0);', 'http://m.yp.ca?tracking=globeandmail']}), dict(name='div', attrs={'id':['ShareArticles', 'topStories', 'seealsobottom']}), dict(name='p', attrs={'class':['leadCaption fontxsmall','lastMod fontxsmall mt10']}), dict(name='img') ] def postprocess_html(self, soup, first_fetch): # Find and preserve single page article layout, can be first or last allArts = soup.findAll(True, {'id':'article'}) if len(allArts)==2: if(len(allArts[0].contents)>len(allArts[1].contents)): allArts[1].extract() else: allArts[0].extract() return soup def parse_feeds(self, args, kwargs): parsed_feeds = BasicNewsRecipe.parse_feeds(self, args, **kwargs) # Eliminate the duplicates urlSet = set() for feed in parsed_feeds: newArticles = [] for article in feed: if article.url in urlSet: feed.articles.remove( article ) else: urlSet.add(article.url) newArticles.append(article) feed.articles = newArticles return parsed_feeds # cover_url = 'http://www.freewarepocketpc.net/wp7/img/the-globe-and-mail.png' #Use the mobile version rather than the web version def print_version(self, url): return (url.replace('cmpid=rss1','service=mobile')).repla ce('http://www.','http://m.')