|
|
#1 |
|
Junior Member
![]() Posts: 7
Karma: 10
Join Date: Mar 2011
Device: kindle k3
|
Jhsb no rss
web: epaper.jinghua.cn/html
write download .py , during the fetch the rss,problems appears News from jhsb Resolved conversion options calibre version: 0.8.14 {'asciiize': False, 'author_sort': None, 'authors': None, 'base_font_size': 0, 'book_producer': None, 'change_justification': 'original', 'chapter': None, 'chapter_mark': 'pagebreak', 'comments': None, 'cover': None, 'debug_pipeline': None, 'dehyphenate': True, 'delete_blank_paragraphs': True, 'disable_font_rescaling': False, 'dont_compress': False, 'dont_download_recipe': False, 'duplicate_links_in_toc': False, 'enable_heuristics': False, 'extra_css': None, 'extract_to': None, 'fix_indents': True, 'font_size_mapping': None, 'format_scene_breaks': True, 'html_unwrap_factor': 0.4, 'input_encoding': None, 'input_profile': <calibre.customize.profiles.InputProfile object at 0x040A2270>, 'insert_blank_line': False, 'insert_blank_line_size': 0.5, 'insert_metadata': False, 'isbn': None, 'italicize_common_cases': True, 'keep_ligatures': False, 'kindlegen': False, 'language': None, 'level1_toc': None, 'level2_toc': None, 'level3_toc': None, 'line_height': 0, 'linearize_tables': False, 'lrf': False, 'margin_bottom': 5.0, 'margin_left': 5.0, 'margin_right': 5.0, 'margin_top': 5.0, 'markup_chapter_headings': True, 'max_toc_links': 50, 'minimum_line_height': 120.0, 'mobi_ignore_margins': False, 'mobi_toc_at_start': False, 'no_chapters_in_toc': False, 'no_inline_navbars': True, 'no_inline_toc': False, 'output_profile': <calibre.customize.profiles.KindleOutput object at 0x040A2590>, 'page_breaks_before': None, 'password': None, 'personal_doc': '[PDOC]', 'prefer_author_sort': False, 'prefer_metadata_cover': False, 'pretty_print': False, 'pubdate': None, 'publisher': None, 'rating': None, 'read_metadata_from_opf': None, 'remove_fake_margins': True, 'remove_first_image': False, 'remove_paragraph_spacing': False, 'remove_paragraph_spacing_indent_size': 1.5, 'renumber_headings': True, 'replace_scene_breaks': '', 'rescale_images': False, 'series': None, 'series_index': None, 'smarten_punctuation': False, 'sr1_replace': '', 'sr1_search': '', 'sr2_replace': '', 'sr2_search': '', 'sr3_replace': '', 'sr3_search': '', 'tags': None, 'test': False, 'timestamp': None, 'title': None, 'title_sort': None, 'toc_filter': None, 'toc_threshold': 6, 'toc_title': None, 'unwrap_lines': True, 'use_auto_toc': False, 'username': None, 'verbose': 2} InputFormatPlugin: Recipe Input running Python function terminated unexpectedly HTTP Error 403: Forbidden (Error Code: 1) Traceback (most recent call last): File "site.py", line 132, in main File "site.py", line 109, in run_entry_point File "site-packages\calibre\utils\ipc\worker.py", line 181, in main File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert File "site-packages\calibre\ebooks\conversion\plumber.py", line 937, in run File "site-packages\calibre\customize\conversion.py", line 204, in __call__ File "site-packages\calibre\web\feeds\input.py", line 105, in convert File "site-packages\calibre\web\feeds\news.py", line 737, in download File "site-packages\calibre\web\feeds\news.py", line 874, in build_index File "c:\docume~1\admini~1\locals~1\temp\calibre_0.8.14 _tmp_syu1qu\siuyys_recipes\recipe0.py", line 29, in parse_index soup = self.index_to_soup(cat) File "site-packages\calibre\web\feeds\news.py", line 498, in index_to_soup File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 199, in open_novisit File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 255, in _mech_open mechanize._response.httperror_seek_wrapper: HTTP Error 403: Forbidden script is: from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds how to avoid the "HTTP Error 403: Forbidden"? please help me. .
Last edited by society2008; 08-15-2011 at 06:43 AM. Reason: edit py format |
|
|
|
|
|
#2 | |
|
Wizard
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 4,004
Karma: 177841
Join Date: Dec 2009
Device: WinMo: IPAQ; Android: HTC HD2, Archos 7o; Java:Gravity T
|
Quote:
The 403 error can be many things. Does the site require login? Does it use cookies? Are there referer limits? These can be handled with Mechanize once you figure out the problem. Use TamperData and FireFox to track them down.
|
|
|
|
|
| Advert | |
|
|
|
|
#3 |
|
Junior Member
![]() Posts: 7
Karma: 10
Join Date: Mar 2011
Device: kindle k3
|
original jhsb.recipe:
from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds The web may log on without registration,can rdisplay any news. |
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Classic G:RSS: Optimized Google Reader (RSS) for the Nook [BETA Testers needed] | Fmstrat | Barnes & Noble NOOK | 24 | 12-28-2010 01:22 PM |
| G:RSS: Optimized Google Reader (RSS) for the Kindle 3 (and Nook) | Fmstrat | Amazon Kindle | 47 | 12-13-2010 01:20 PM |
| Is there a good way to convert partial rss to full rss feeds. | Zorz | Other formats | 5 | 05-29-2010 01:17 PM |
| RSS? | lordofazeroth | Cybook | 5 | 03-13-2009 07:42 AM |
| RSS- best out there? | sirdouglas | Kindle Formats | 0 | 12-21-2008 03:38 AM |