View Single Post
Old 08-11-2012, 10:04 PM   #3
lrui
Enthusiast
lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.lrui ought to be getting tired of karma fortunes by now.
 
lrui's Avatar
 
Posts: 49
Karma: 475062
Join Date: Aug 2012
Device: nook simple touch
Quote:
Originally Posted by nickredding View Post
Code:
for div in soup.findAll('div','module bodytext'):
    div['class']='module'
thans for reply,but it dosn't work


Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag

class AdvancedUserRecipe1277443634(BasicNewsRecipe):
    title          = u'BBC中文网'
    oldest_article = 1.5
    max_articles_per_feed = 1000

    feeds          = [
	(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
	(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
	(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
	(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
	(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
	(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
	(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml'),
	(u'\u82f1\u8bed\u6559\u5b66', u'http://www.bbc.co.uk/zhongwen/simp/elt/index.xml')
	]
    template_css = u'''
        .article_date {color: gray;font-family:"仿宋","fs",serif;}
        .article_description {font-family:"微软雅黑","黑体","ht",sans-serif; text-indent: 0pt;font-size: 0.8em;}
        a.article {font-weight: bold; text-align:left;font-family:"宋体","zw",serif;}
        a.feed {font-weight: bold;}
        .calibre_navbar {font-family:"微软雅黑","黑体","ht",sans-serif;}'''
    extra_css = u'''
        @font-face {
	font-family:"zw";
	src:local("宋体"),A
	local("DK-SONGTI"),
	url(../fonts/zw.ttf),
	url(res:///opt/sony/ebook/FONT/zw.ttf),
	url(res:///Data/FONT/zw.ttf),
	url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
	url(res:///ebook/fonts/../../mnt/sdcard/fonts/zw.ttf),
	url(res:///ebook/fonts/../../mnt/extsd/fonts/zw.ttf),
	url(res:///ebook/fonts/zw.ttf),
	url(res:///ebook/fonts/DroidSansFallback.ttf),
	url(res:///fonts/ttf/zw.ttf),
	url(res:///../../media/mmcblk0p1/fonts/zw.ttf),
	url(res:///DK_System/system/font/zw.ttf),
	url(res:///abook/fonts/zw.ttf),
	url(res:///system/fonts/zw.ttf),
	url(res:///system/media/sdcard/fonts/zw.ttf),
	url(res:///media/fonts/zw.ttf),
	url(res:///sdcard/fonts/zw.ttf),
	url(res:///system/fonts/DroidSansFallback.ttf),
	url(res:///mnt/MOVIFAT/font/zw.ttf),
	url(res:///media/flash/fonts/zw.ttf),
	url(res:///media/sd/fonts/zw.ttf),
	url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
	url(res:///../../fonts/zw.ttf),
	url(res:///../fonts/zw.ttf);}
        @font-face {
	font-family:"fs";
	src:local("仿宋"),
	local("DK-FANGSONG"),
	url(../fonts/fs.ttf),
	url(res:///opt/sony/ebook/FONT/fs.ttf),
	url(res:///Data/FONT/fs.ttf),
	url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
	url(res:///ebook/fonts/../../mnt/sdcard/fonts/fs.ttf),
	url(res:///ebook/fonts/../../mnt/extsd/fonts/fs.ttf),
	url(res:///ebook/fonts/fs.ttf),
	url(res:///ebook/fonts/DroidSansFallback.ttf),
	url(res:///fonts/ttf/fs.ttf),
	url(res:///../../media/mmcblk0p1/fonts/fs.ttf),
	url(res:///DK_System/system/font/fs.ttf),
	url(res:///abook/fonts/fs.ttf),
	url(res:///system/fonts/fs.ttf),
	url(res:///system/media/sdcard/fonts/fs.ttf),
	url(res:///media/fonts/fs.ttf),
	url(res:///sdcard/fonts/fs.ttf),
	url(res:///system/fonts/DroidSansFallback.ttf),
	url(res:///mnt/MOVIFAT/font/fs.ttf),
	url(res:///media/flash/fonts/fs.ttf),
	url(res:///media/sd/fonts/fs.ttf),
	url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
	url(res:///../../fonts/fs.ttf),
	url(res:///../fonts/fs.ttf);}
        @font-face {
	font-family:"kt";
	src:local("楷体"),
	local("DK-KAITI"),
	url(../fonts/kt.ttf),
	url(res:///opt/sony/ebook/FONT/kt.ttf),
	url(res:///Data/FONT/kt.ttf),
	url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
	url(res:///ebook/fonts/../../mnt/sdcard/fonts/kt.ttf),
	url(res:///ebook/fonts/../../mnt/extsd/fonts/kt.ttf),
	url(res:///ebook/fonts/kt.ttf),
	url(res:///ebook/fonts/DroidSansFallback.ttf),
	url(res:///fonts/ttf/kt.ttf),
	url(res:///../../media/mmcblk0p1/fonts/kt.ttf),
	url(res:///DK_System/system/font/kt.ttf),
	url(res:///abook/fonts/kt.ttf),
	url(res:///system/fonts/kt.ttf),
	url(res:///system/media/sdcard/fonts/kt.ttf),
	url(res:///media/fonts/kt.ttf),
	url(res:///sdcard/fonts/kt.ttf),
	url(res:///system/fonts/DroidSansFallback.ttf),
	url(res:///mnt/MOVIFAT/font/kt.ttf),
	url(res:///media/flash/fonts/kt.ttf),
	url(res:///media/sd/fonts/kt.ttf),
	url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
	url(res:///../../fonts/kt.ttf),
	url(res:///../fonts/kt.ttf);}
        @font-face {
	font-family:"ht";
	src:local("微软雅黑"),
	local("DK-HEITI"),
	url(../fonts/ht.ttf),
	url(res:///opt/sony/ebook/FONT/ht.ttf),
	url(res:///Data/FONT/ht.ttf),
	url(res:///opt/sony/ebook/FONT/tt0011m_.ttf),
	url(res:///ebook/fonts/../../mnt/sdcard/fonts/ht.ttf),
	url(res:///ebook/fonts/../../mnt/extsd/fonts/ht.ttf),
	url(res:///ebook/fonts/ht.ttf),
	url(res:///ebook/fonts/DroidSansFallback.ttf),
	url(res:///fonts/ttf/ht.ttf),
	url(res:///../../media/mmcblk0p1/fonts/ht.ttf),
	url(res:///DK_System/system/font/ht.ttf),
	url(res:///abook/fonts/ht.ttf),
	url(res:///system/fonts/ht.ttf),
	url(res:///system/media/sdcard/fonts/ht.ttf),
	url(res:///media/fonts/ht.ttf),
	url(res:///sdcard/fonts/ht.ttf),
	url(res:///system/fonts/DroidSansFallback.ttf),
	url(res:///mnt/MOVIFAT/font/ht.ttf),
	url(res:///media/flash/fonts/ht.ttf),
	url(res:///media/sd/fonts/ht.ttf),
	url(res:///opt/onyx/arm/lib/fonts/AdobeHeitiStd-Regular.otf),
	url(res:///../../fonts/ht.ttf),
	url(res:///../fonts/ht.ttf);}
        body {
	padding: 0%;
	margin-top: 0%;
	margin-bottom: 0%;
	margin-left: 1%;
	margin-right: 1%;
	line-height:130%;
	font-family:"宋体","zw",serif;
	text-align: justify;
	text-indent: 0em;
	color: black;}
        p {
	margin-top: 5pt;
	margin-bottom: 5pt;
	line-height: 130%;
	font-family:"宋体","zw",serif;
	text-align: justify;
	text-indent: 2em;}
        div {
	margin:0px;
	padding:0px;
	line-height:130%;
	text-align: justify;
	font-family:"宋体","zw",serif;}
        h1 {
	margin-top: 1em;
	margin-bottom: 0.5em;
	font-family:"微软雅黑","黑体","ht",sans-serif;
	font-size: xx-large;
	line-height: 130%;
	text-align: center;
	text-indent: 0em;}
        h2 {
	margin-top: 1em;
	margin-bottom: 0.5em;
	font-family:"微软雅黑","黑体","ht",sans-serif;
	font-size: x-large;
	line-height: 130%;
	text-align: center;
	text-indent: 0em;}
        h3 {
	margin-top: 1em;
	margin-bottom: 0.5em;
	font-family:"微软雅黑","黑体","ht",sans-serif;
	font-size: large;
	line-height: 130%;
	text-align: center;
	text-indent: 0em;}
        h4 {
	margin-top: 1em;
	margin-bottom: 0.5em;
	font-family:"微软雅黑","黑体","ht",sans-serif;
	font-size: medium;
	text-align: center;
	text-indent: 0em;
	line-height: 130%;}
        div.datestamp{font-family:"楷体","kt",serif;text-align: justify;text-indent: 0em;text-align: center;}
        .articledescription {font-family: "微软雅黑", 'ht', sans-serif;}
        span {font-family:"微软雅黑","黑体","ht",sans-serif;}
        span.lastupdated {font-family:"楷体","kt",serif;}
        a {font-family:"微软雅黑","黑体","ht",sans-serif;}
        ul {font-family:"宋体","zw",serif;}
        li {font-family:"宋体","zw",serif;}
        ol {font-family:"宋体","zw",serif;}
        div.module {text-indent: 0em;text-align: center;}
        img {text-align: center;}
        p.caption {font-family:"仿宋","fs",serif;text-align: center;text-indent: 0em;}
        hr {height: 1px; border: 0px; color: black; background-color: black}
            '''
    __author__            = 'k4user'
    __version__            = '1.0'
    language = 'zh'
    pubisher  = 'BBC
    description           = 'BBC news in Chinese'
    category              = 'News, Chinese'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    encoding               = 'UTF-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
    keep_only_tags = [
                              dict(name='h1'),
                              dict(name='p', attrs={'class':['primary-topic','summary']}),
                              dict(name='div', attrs={'class':['bodytext','datestamp','module']}),
                              ]
    remove_tags = [dict(name='br', attrs={'class':['calibre12','calibre11']})]
    def preprocess_html(self, soup): 
           for div in soup.findAll('div','module bodytext'):
                 div['class']='module'
           return soup
lrui is offline   Reply With Quote