Here's the recipe of Indian English Daily Indian Express.
Quote:
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.chardet import xml_to_unicode
class AdvancedUserRecipe1308039551(BasicNewsRecipe):
title = u'Indian Express'
oldest_article = 10
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
remove_attributes = ['width','height']
use_embedded_content = False
language = 'en_IN'
remove_empty_feeds = True
remove_tags_before = dict(name='h2')
remove_tags =[dict(id=['google_new','express_special','footer','header']), {'class':'express_special'},
{'class':'clr_1'}, {'class':'addthis_toolbox addthis_default_style'},
{'class':'box'} ,{'class':'nobdr'} ,{'class':'box_facebook'} ,{'class':'currentc2'} ,{'class':'innercontenthc2'},
{'class':'bookmarks_div'},{'class':'comment_cnt_bo ok'},
{'class':'addthis_separator'}, {'class':'icons'},{'class':'bookmarks'},{'class':' single_page'}
,dict(name='iframe'),{'class':'picture_gal'}]
feeds = [(u'Latest News', u'http://syndication.indianexpress.com/rss/latest-news.xml'), (u'Front Page', u'http://syndication.indianexpress.com/rss/33/front-page.xml'), (u'Editorial', u'http://syndication.indianexpress.com/rss/35/editorials.xml'), (u'Op-Ed', u'http://syndication.indianexpress.com/rss/36/oped.xml'), (u'Economy', u'http://syndication.indianexpress.com/rss/794/economy.xml'), (u'Politics', u'http://syndication.indianexpress.com/rss/799/politics.xml'), (u'Regional', u'http://syndication.indianexpress.com/rss/800/regional.xml'), (u'Sunday Stories', u'http://syndication.indianexpress.com/rss/723/sunday-stories.xml'), (u'Climate Change', u'http://syndication.indianexpress.com/rss/912/climate-change.xml'), (u'Letters to Editor', u'http://syndication.indianexpress.com/rss/40/letters-to-editor.xml')]
def print_version(self,url):
parts = url.split('/')
print_url = 'http://' + parts[2] + '/story-print/' + parts[5]
return print_url
|