Hi folks!
I created a recipe for Times of India.
Please review and/or use.
Code:
from contextlib import closing
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser, as_unicode
from calibre.web.feeds import feed_from_xml, Feed
import time
class TimesOfIndia(BasicNewsRecipe):
title = u'Times of India'
description = u'News provided by The Times of India website'
language = 'en_IN'
__author__ = 'Ashish Madeti'
masthead_url = 'http://news.lib.uchicago.edu/wp-content/uploads/2013/10/times-of-india-logo.jpg.jpg'
cover_url = 'http://epaperbeta.timesofindia.com/NasData/Publications/TheTimesOfIndia/Delhi/'+time.strftime("%Y")+'/'+time.strftime("%m")+'/'+time.strftime("%d")+'/Page/'+time.strftime("%d")+'_'+time.strftime("%m")+'_'+time.strftime("%Y")+'_001.jpg'
publication_type = 'newspaper'
oldest_article = 1.5
compress_news_images = True
compress_news_images_max_size = 30
no_stylesheets = True
auto_cleanup = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title'}
feeds = [
(u'India', u'http://timesofindia.feedsportal.com/c/33039/f/533916/index.rss', 20),
(u'World', u'http://timesofindia.feedsportal.com/c/33039/f/533917/index.rss', 10),
(u'Bussiness', u'http://timesofindia.feedsportal.com/c/33039/f/533919/index.rss', 6),
(u'Tech', u'http://timesofindia.feedsportal.com/c/33039/f/533923/index.rss', 3),
(u'Opinion', u'http://timesofindia.feedsportal.com/c/33039/f/533927/index.rss', 5),
(u'Sports', u'http://timesofindia.feedsportal.com/c/33039/f/533921/index.rss', 4),
(u'Science', u'http://timesofindia.feedsportal.com/c/33039/f/533922/index.rss', 2)
]
def parse_feeds(self):
'''
Create a list of articles from the list of feeds returned by :meth:`BasicNewsRecipe.get_feeds`.
Return a list of :class:`Feed` objects.
'''
feeds = self.get_feeds()
parsed_feeds = []
for obj in feeds:
if isinstance(obj, basestring):
title, url, max_articles = None, obj, None
else:
title, url, max_articles = obj
if url.startswith('feed://'):
url = 'http'+url[4:]
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
try:
with closing(self.browser.open(url)) as f:
parsed_feeds.append(feed_from_xml(f.read(),
title=title,
log=self.log,
oldest_article=self.oldest_article,
max_articles_per_feed=max_articles,
get_article_url=self.get_article_url))
if (self.delay > 0):
time.sleep(self.delay)
except Exception as err:
feed = Feed()
msg = 'Failed feed: %s'%(title if title else url)
feed.populate_from_preparsed_feed(msg, [])
feed.description = as_unicode(err)
parsed_feeds.append(feed)
self.log.exception(msg)
remove = [fl for fl in parsed_feeds if len(fl) == 0 and
self.remove_empty_feeds]
for f in remove:
parsed_feeds.remove(f)
return parsed_feeds