Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'BBC中文网'
oldest_article = 1.5
max_articles_per_feed = 1000
feeds = [
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml'),
(u'\u82f1\u8bed\u6559\u5b66', u'http://www.bbc.co.uk/zhongwen/simp/elt/index.xml')
]
template_css = u'''
.article_date {color: gray;font-family:"仿宋","fs",serif;}
.article_description {font-family:"微软雅黑","黑体","ht",sans-serif; text-indent: 0pt;font-size: 0.8em;}
a.article {font-weight: bold; text-align:left;font-family:"宋体","zw",serif;}
a.feed {font-weight: bold;}
.calibre_navbar {font-family:"微软雅黑","黑体","ht",sans-serif;}'''
extra_css = u'''
@font-face {
font-family:"zw";
src:local("宋体"),A
local("DK-SONGTI"),
url(../fonts/zw.ttf),
url(res:///opt/sony/ebook/FONT/zw.ttf),
url(res:///Data/FONT/zw.ttf),
url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
url(res:///ebook/fonts/../../mnt/sdcard/fonts/zw.ttf),
url(res:///ebook/fonts/../../mnt/extsd/fonts/zw.ttf),
url(res:///ebook/fonts/zw.ttf),
url(res:///ebook/fonts/DroidSansFallback.ttf),
url(res:///fonts/ttf/zw.ttf),
url(res:///../../media/mmcblk0p1/fonts/zw.ttf),
url(res:///DK_System/system/font/zw.ttf),
url(res:///abook/fonts/zw.ttf),
url(res:///system/fonts/zw.ttf),
url(res:///system/media/sdcard/fonts/zw.ttf),
url(res:///media/fonts/zw.ttf),
url(res:///sdcard/fonts/zw.ttf),
url(res:///system/fonts/DroidSansFallback.ttf),
url(res:///mnt/MOVIFAT/font/zw.ttf),
url(res:///media/flash/fonts/zw.ttf),
url(res:///media/sd/fonts/zw.ttf),
url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
url(res:///../../fonts/zw.ttf),
url(res:///../fonts/zw.ttf);}
@font-face {
font-family:"fs";
src:local("仿宋"),
local("DK-FANGSONG"),
url(../fonts/fs.ttf),
url(res:///opt/sony/ebook/FONT/fs.ttf),
url(res:///Data/FONT/fs.ttf),
url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
url(res:///ebook/fonts/../../mnt/sdcard/fonts/fs.ttf),
url(res:///ebook/fonts/../../mnt/extsd/fonts/fs.ttf),
url(res:///ebook/fonts/fs.ttf),
url(res:///ebook/fonts/DroidSansFallback.ttf),
url(res:///fonts/ttf/fs.ttf),
url(res:///../../media/mmcblk0p1/fonts/fs.ttf),
url(res:///DK_System/system/font/fs.ttf),
url(res:///abook/fonts/fs.ttf),
url(res:///system/fonts/fs.ttf),
url(res:///system/media/sdcard/fonts/fs.ttf),
url(res:///media/fonts/fs.ttf),
url(res:///sdcard/fonts/fs.ttf),
url(res:///system/fonts/DroidSansFallback.ttf),
url(res:///mnt/MOVIFAT/font/fs.ttf),
url(res:///media/flash/fonts/fs.ttf),
url(res:///media/sd/fonts/fs.ttf),
url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
url(res:///../../fonts/fs.ttf),
url(res:///../fonts/fs.ttf);}
@font-face {
font-family:"kt";
src:local("楷体"),
local("DK-KAITI"),
url(../fonts/kt.ttf),
url(res:///opt/sony/ebook/FONT/kt.ttf),
url(res:///Data/FONT/kt.ttf),
url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
url(res:///ebook/fonts/../../mnt/sdcard/fonts/kt.ttf),
url(res:///ebook/fonts/../../mnt/extsd/fonts/kt.ttf),
url(res:///ebook/fonts/kt.ttf),
url(res:///ebook/fonts/DroidSansFallback.ttf),
url(res:///fonts/ttf/kt.ttf),
url(res:///../../media/mmcblk0p1/fonts/kt.ttf),
url(res:///DK_System/system/font/kt.ttf),
url(res:///abook/fonts/kt.ttf),
url(res:///system/fonts/kt.ttf),
url(res:///system/media/sdcard/fonts/kt.ttf),
url(res:///media/fonts/kt.ttf),
url(res:///sdcard/fonts/kt.ttf),
url(res:///system/fonts/DroidSansFallback.ttf),
url(res:///mnt/MOVIFAT/font/kt.ttf),
url(res:///media/flash/fonts/kt.ttf),
url(res:///media/sd/fonts/kt.ttf),
url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
url(res:///../../fonts/kt.ttf),
url(res:///../fonts/kt.ttf);}
@font-face {
font-family:"ht";
src:local("微软雅黑"),
local("DK-HEITI"),
url(../fonts/ht.ttf),
url(res:///opt/sony/ebook/FONT/ht.ttf),
url(res:///Data/FONT/ht.ttf),
url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
url(res:///ebook/fonts/../../mnt/sdcard/fonts/ht.ttf),
url(res:///ebook/fonts/../../mnt/extsd/fonts/ht.ttf),
url(res:///ebook/fonts/ht.ttf),
url(res:///ebook/fonts/DroidSansFallback.ttf),
url(res:///fonts/ttf/ht.ttf),
url(res:///../../media/mmcblk0p1/fonts/ht.ttf),
url(res:///DK_System/system/font/ht.ttf),
url(res:///abook/fonts/ht.ttf),
url(res:///system/fonts/ht.ttf),
url(res:///system/media/sdcard/fonts/ht.ttf),
url(res:///media/fonts/ht.ttf),
url(res:///sdcard/fonts/ht.ttf),
url(res:///system/fonts/DroidSansFallback.ttf),
url(res:///mnt/MOVIFAT/font/ht.ttf),
url(res:///media/flash/fonts/ht.ttf),
url(res:///media/sd/fonts/ht.ttf),
url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
url(res:///../../fonts/ht.ttf),
url(res:///../fonts/ht.ttf);}
body {
padding: 0%;
margin-top: 0%;
margin-bottom: 0%;
margin-left: 1%;
margin-right: 1%;
line-height:130%;
font-family:"宋体","zw",serif;
text-align: justify;
text-indent: 0em;
color: black;}
p {
margin-top: 5pt;
margin-bottom: 5pt;
line-height: 130%;
font-family:"宋体","zw",serif;
text-align: justify;
text-indent: 2em;}
div {
margin:0px;
padding:0px;
line-height:130%;
text-align: justify;
font-family:"宋体","zw",serif;}
h1 {
margin-top: 1em;
margin-bottom: 0.5em;
font-family:"微软雅黑","黑体","ht",sans-serif;
font-size: xx-large;
line-height: 130%;
text-align: center;
text-indent: 0em;}
h2 {
margin-top: 1em;
margin-bottom: 0.5em;
font-family:"微软雅黑","黑体","ht",sans-serif;
font-size: x-large;
line-height: 130%;
text-align: center;
text-indent: 0em;}
h3 {
margin-top: 1em;
margin-bottom: 0.5em;
font-family:"微软雅黑","黑体","ht",sans-serif;
font-size: large;
line-height: 130%;
text-align: center;
text-indent: 0em;}
h4 {
margin-top: 1em;
margin-bottom: 0.5em;
font-family:"微软雅黑","黑体","ht",sans-serif;
font-size: medium;
text-align: center;
text-indent: 0em;
line-height: 130%;}
div.datestamp{font-family:"楷体","kt",serif;text-align: justify;text-indent: 0em;text-align: center;}
.articledescription {font-family: "微软雅黑", 'ht', sans-serif;}
span {font-family:"微软雅黑","黑体","ht",sans-serif;}
span.lastupdated {font-family:"楷体","kt",serif;}
a {font-family:"微软雅黑","黑体","ht",sans-serif;}
ul {font-family:"宋体","zw",serif;}
li {font-family:"宋体","zw",serif;}
ol {font-family:"宋体","zw",serif;}
div.module {text-indent: 0em;text-align: center;}
img {text-align: center;}
p.caption {font-family:"仿宋","fs",serif;text-align: center;text-indent: 0em;}
hr {height: 1px; border: 0px; color: black; background-color: black}
'''
__author__ = 'k4user'
__version__ = '1.0'
language = 'zh'
pubisher = 'BBC
description = 'BBC news in Chinese'
category = 'News, Chinese'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
keep_only_tags = [
dict(name='h1'),
dict(name='p', attrs={'class':['primary-topic','summary']}),
dict(name='div', attrs={'class':['bodytext','datestamp','module']}),
]
remove_tags = [dict(name='br', attrs={'class':['calibre12','calibre11']})]
def preprocess_html(self, soup):
for div in soup.findAll('div','module bodytext'):
div['class']='module'
return soup