View Single Post
Old 10-23-2010, 10:03 PM   #1
esurfer
Junior Member
esurfer began at the beginning.
 
Posts: 5
Karma: 12
Join Date: Sep 2010
Device: kindle 3
Singtao STNN news recipe 星岛环球网

below is a script for www.stnn.cc 星岛环球网.


-----------------------------------------------------------------


__license__ = 'GPL v3'
__copyright__ = '2010, Larry Chan <larry1chan at gmail.com>'
'''
Singtao STNN
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe

class SingtaoSTNN(BasicNewsRecipe):
title = 'Singtao STNN'
__author__ = 'Larry Chan, larry1chan'
description = 'Chinese News'
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 5
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'gb2312'
publisher = 'Singtao STNN'
category = 'news, China, world'
language = 'zh'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}


remove_tags_before = dict(name='div', attrs={'class':['page_box']})
remove_tags_after = dict(name='div', attrs={'class':['pagelist']})

keep_only_tags = [
dict(name='div', attrs={'class':['font_title clearfix']}),
dict(name='div', attrs={'id':['content_zoom']})

]

remove_attributes = ['width','height','href']

# for a full list of rss check out http://www.stnn.cc/rss/

feeds = [ (u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'),
(u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'),
(u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'),
(u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'),
(u'International', u'http://www.stnn.cc/rss/guoji/index.xml'),
(u'China', u'http://www.stnn.cc/rss/china/index.xml'),
(u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'),
(u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'),
(u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml')

]
esurfer is offline   Reply With Quote