![]() |
#1 |
Junior Member
![]() Posts: 7
Karma: 10
Join Date: Mar 2011
Device: kindle k3
|
Jhsb no rss
web: epaper.jinghua.cn/html
write download .py , during the fetch the rss,problems appears News from jhsb Resolved conversion options calibre version: 0.8.14 {'asciiize': False, 'author_sort': None, 'authors': None, 'base_font_size': 0, 'book_producer': None, 'change_justification': 'original', 'chapter': None, 'chapter_mark': 'pagebreak', 'comments': None, 'cover': None, 'debug_pipeline': None, 'dehyphenate': True, 'delete_blank_paragraphs': True, 'disable_font_rescaling': False, 'dont_compress': False, 'dont_download_recipe': False, 'duplicate_links_in_toc': False, 'enable_heuristics': False, 'extra_css': None, 'extract_to': None, 'fix_indents': True, 'font_size_mapping': None, 'format_scene_breaks': True, 'html_unwrap_factor': 0.4, 'input_encoding': None, 'input_profile': <calibre.customize.profiles.InputProfile object at 0x040A2270>, 'insert_blank_line': False, 'insert_blank_line_size': 0.5, 'insert_metadata': False, 'isbn': None, 'italicize_common_cases': True, 'keep_ligatures': False, 'kindlegen': False, 'language': None, 'level1_toc': None, 'level2_toc': None, 'level3_toc': None, 'line_height': 0, 'linearize_tables': False, 'lrf': False, 'margin_bottom': 5.0, 'margin_left': 5.0, 'margin_right': 5.0, 'margin_top': 5.0, 'markup_chapter_headings': True, 'max_toc_links': 50, 'minimum_line_height': 120.0, 'mobi_ignore_margins': False, 'mobi_toc_at_start': False, 'no_chapters_in_toc': False, 'no_inline_navbars': True, 'no_inline_toc': False, 'output_profile': <calibre.customize.profiles.KindleOutput object at 0x040A2590>, 'page_breaks_before': None, 'password': None, 'personal_doc': '[PDOC]', 'prefer_author_sort': False, 'prefer_metadata_cover': False, 'pretty_print': False, 'pubdate': None, 'publisher': None, 'rating': None, 'read_metadata_from_opf': None, 'remove_fake_margins': True, 'remove_first_image': False, 'remove_paragraph_spacing': False, 'remove_paragraph_spacing_indent_size': 1.5, 'renumber_headings': True, 'replace_scene_breaks': '', 'rescale_images': False, 'series': None, 'series_index': None, 'smarten_punctuation': False, 'sr1_replace': '', 'sr1_search': '', 'sr2_replace': '', 'sr2_search': '', 'sr3_replace': '', 'sr3_search': '', 'tags': None, 'test': False, 'timestamp': None, 'title': None, 'title_sort': None, 'toc_filter': None, 'toc_threshold': 6, 'toc_title': None, 'unwrap_lines': True, 'use_auto_toc': False, 'username': None, 'verbose': 2} InputFormatPlugin: Recipe Input running Python function terminated unexpectedly HTTP Error 403: Forbidden (Error Code: 1) Traceback (most recent call last): File "site.py", line 132, in main File "site.py", line 109, in run_entry_point File "site-packages\calibre\utils\ipc\worker.py", line 181, in main File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert File "site-packages\calibre\ebooks\conversion\plumber.py", line 937, in run File "site-packages\calibre\customize\conversion.py", line 204, in __call__ File "site-packages\calibre\web\feeds\input.py", line 105, in convert File "site-packages\calibre\web\feeds\news.py", line 737, in download File "site-packages\calibre\web\feeds\news.py", line 874, in build_index File "c:\docume~1\admini~1\locals~1\temp\calibre_0.8.14 _tmp_syu1qu\siuyys_recipes\recipe0.py", line 29, in parse_index soup = self.index_to_soup(cat) File "site-packages\calibre\web\feeds\news.py", line 498, in index_to_soup File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 199, in open_novisit File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 255, in _mech_open mechanize._response.httperror_seek_wrapper: HTTP Error 403: Forbidden script is: from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds how to avoid the "HTTP Error 403: Forbidden"? please help me. ![]() Last edited by society2008; 08-15-2011 at 05:43 AM. Reason: edit py format |
![]() |
![]() |
![]() |
#2 | |
Wizard
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 4,004
Karma: 177841
Join Date: Dec 2009
Device: WinMo: IPAQ; Android: HTC HD2, Archos 7o; Java:Gravity T
|
Quote:
![]() |
|
![]() |
![]() |
Advert | |
|
![]() |
#3 |
Junior Member
![]() Posts: 7
Karma: 10
Join Date: Mar 2011
Device: kindle k3
|
original jhsb.recipe:
from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds The web may log on without registration,can rdisplay any news. |
![]() |
![]() |
![]() |
|
![]() |
||||
Thread | Thread Starter | Forum | Replies | Last Post |
Classic G:RSS: Optimized Google Reader (RSS) for the Nook [BETA Testers needed] | Fmstrat | Barnes & Noble NOOK | 24 | 12-28-2010 12:22 PM |
G:RSS: Optimized Google Reader (RSS) for the Kindle 3 (and Nook) | Fmstrat | Amazon Kindle | 47 | 12-13-2010 12:20 PM |
Is there a good way to convert partial rss to full rss feeds. | Zorz | Other formats | 5 | 05-29-2010 12:17 PM |
RSS? | lordofazeroth | Cybook | 5 | 03-13-2009 06:42 AM |
RSS- best out there? | sirdouglas | Kindle Formats | 0 | 12-21-2008 02:38 AM |