Jhsb no rss

society2008 · 08-15-2011, 06:31 AM

web: epaper.jinghua.cn/html
write download .py ,
during the fetch the rss,problems appears

News from jhsb
Resolved conversion options
calibre version: 0.8.14
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_compress': False,
'dont_download_recipe': False,
'duplicate_links_in_toc': False,
'enable_heuristics': False,
'extra_css': None,
'extract_to': None,
'fix_indents': True,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x040A2270>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'kindlegen': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'mobi_ignore_margins': False,
'mobi_toc_at_start': False,
'no_chapters_in_toc': False,
'no_inline_navbars': True,
'no_inline_toc': False,
'output_profile': <calibre.customize.profiles.KindleOutput object at 0x040A2590>,
'page_breaks_before': None,
'password': None,
'personal_doc': '[PDOC]',
'prefer_author_sort': False,
'prefer_metadata_cover': False,
'pretty_print': False,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'rescale_images': False,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'unwrap_lines': True,
'use_auto_toc': False,
'username': None,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Python function terminated unexpectedly
HTTP Error 403: Forbidden (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 132, in main
File "site.py", line 109, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 181, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 937, in run
File "site-packages\calibre\customize\conversion.py", line 204, in __call__
File "site-packages\calibre\web\feeds\input.py", line 105, in convert
File "site-packages\calibre\web\feeds\news.py", line 737, in download
File "site-packages\calibre\web\feeds\news.py", line 874, in build_index
File "c:\docume~1\admini~1\locals~1\temp\calibre_0.8.14 _tmp_syu1qu\siuyys_recipes\recipe0.py", line 29, in parse_index
soup = self.index_to_soup(cat)
File "site-packages\calibre\web\feeds\news.py", line 498, in index_to_soup
File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 199, in open_novisit
File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 255, in _mech_open
mechanize._response.httperror_seek_wrapper: HTTP Error 403: Forbidden

script is:
from calibre.web.feeds.news import BasicNewsRecipe
import re

class jhsb(BasicNewsRecipe):
title = u'jhsb'
__author__ = 'zyl'
language = 'zh'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif'
language = 'zh'
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'}))

def parse_index(self):
catnames = {}
catnames["http://epaper.jinghua.cn/html/"] = ""

feeds = []

for cat in catnames.keys():
articles = []
soup = self.index_to_soup(cat)
for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}):
url = a['href'].strip()
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
self.log("found %s" % url)
articles.append(myarticle)
self.log("Adding URL %s\n" %url)
if articles:
feeds.append((catnames[cat], articles))
return feeds

how to avoid the "HTTP Error 403: Forbidden"?
please help me.

.

Starson17 · 08-15-2011, 10:07 AM

Quote:

Originally Posted by society2008

how to avoid the "HTTP Error 403: Forbidden"?
please help me.

.

It's preferred if you post your error messages in the spoiler tags (eye with red "X").

The 403 error can be many things. Does the site require login? Does it use cookies? Are there referer limits? These can be handled with Mechanize once you figure out the problem. Use TamperData and FireFox to track them down.

society2008 · 08-15-2011, 11:58 PM

original jhsb.recipe:

from calibre.web.feeds.news import BasicNewsRecipe
import re

class jhsb(BasicNewsRecipe):
title = u'jhsb'
__author__ = 'zyl'
language = 'zh'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif'
language = 'zh'
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'}))

def parse_index(self):
catnames = {}
catnames["http://epaper.jinghua.cn/html/"] = ""

feeds = []

for cat in catnames.keys():
articles = []
soup = self.index_to_soup(cat)
for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}):
url = a['href'].strip()
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
self.log("found %s" % url)
articles.append(myarticle)
self.log("Adding URL %s\n" %url)
if articles:
feeds.append((catnames[cat], articles))
return feeds

The web may log on without registration,can rdisplay any news.

08-15-2011, 06:31 AM	#1
society2008 Junior Member Posts: 7 Karma: 10 Join Date: Mar 2011 Device: kindle k3	Jhsb no rss web: epaper.jinghua.cn/html write download .py , during the fetch the rss,problems appears News from jhsb Resolved conversion options calibre version: 0.8.14 {'asciiize': False, 'author_sort': None, 'authors': None, 'base_font_size': 0, 'book_producer': None, 'change_justification': 'original', 'chapter': None, 'chapter_mark': 'pagebreak', 'comments': None, 'cover': None, 'debug_pipeline': None, 'dehyphenate': True, 'delete_blank_paragraphs': True, 'disable_font_rescaling': False, 'dont_compress': False, 'dont_download_recipe': False, 'duplicate_links_in_toc': False, 'enable_heuristics': False, 'extra_css': None, 'extract_to': None, 'fix_indents': True, 'font_size_mapping': None, 'format_scene_breaks': True, 'html_unwrap_factor': 0.4, 'input_encoding': None, 'input_profile': <calibre.customize.profiles.InputProfile object at 0x040A2270>, 'insert_blank_line': False, 'insert_blank_line_size': 0.5, 'insert_metadata': False, 'isbn': None, 'italicize_common_cases': True, 'keep_ligatures': False, 'kindlegen': False, 'language': None, 'level1_toc': None, 'level2_toc': None, 'level3_toc': None, 'line_height': 0, 'linearize_tables': False, 'lrf': False, 'margin_bottom': 5.0, 'margin_left': 5.0, 'margin_right': 5.0, 'margin_top': 5.0, 'markup_chapter_headings': True, 'max_toc_links': 50, 'minimum_line_height': 120.0, 'mobi_ignore_margins': False, 'mobi_toc_at_start': False, 'no_chapters_in_toc': False, 'no_inline_navbars': True, 'no_inline_toc': False, 'output_profile': <calibre.customize.profiles.KindleOutput object at 0x040A2590>, 'page_breaks_before': None, 'password': None, 'personal_doc': '[PDOC]', 'prefer_author_sort': False, 'prefer_metadata_cover': False, 'pretty_print': False, 'pubdate': None, 'publisher': None, 'rating': None, 'read_metadata_from_opf': None, 'remove_fake_margins': True, 'remove_first_image': False, 'remove_paragraph_spacing': False, 'remove_paragraph_spacing_indent_size': 1.5, 'renumber_headings': True, 'replace_scene_breaks': '', 'rescale_images': False, 'series': None, 'series_index': None, 'smarten_punctuation': False, 'sr1_replace': '', 'sr1_search': '', 'sr2_replace': '', 'sr2_search': '', 'sr3_replace': '', 'sr3_search': '', 'tags': None, 'test': False, 'timestamp': None, 'title': None, 'title_sort': None, 'toc_filter': None, 'toc_threshold': 6, 'toc_title': None, 'unwrap_lines': True, 'use_auto_toc': False, 'username': None, 'verbose': 2} InputFormatPlugin: Recipe Input running Python function terminated unexpectedly HTTP Error 403: Forbidden (Error Code: 1) Traceback (most recent call last): File "site.py", line 132, in main File "site.py", line 109, in run_entry_point File "site-packages\calibre\utils\ipc\worker.py", line 181, in main File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert File "site-packages\calibre\ebooks\conversion\plumber.py", line 937, in run File "site-packages\calibre\customize\conversion.py", line 204, in __call__ File "site-packages\calibre\web\feeds\input.py", line 105, in convert File "site-packages\calibre\web\feeds\news.py", line 737, in download File "site-packages\calibre\web\feeds\news.py", line 874, in build_index File "c:\docume~1\admini~1\locals~1\temp\calibre_0.8.14 _tmp_syu1qu\siuyys_recipes\recipe0.py", line 29, in parse_index soup = self.index_to_soup(cat) File "site-packages\calibre\web\feeds\news.py", line 498, in index_to_soup File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 199, in open_novisit File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 255, in _mech_open mechanize._response.httperror_seek_wrapper: HTTP Error 403: Forbidden script is: from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds how to avoid the "HTTP Error 403: Forbidden"? please help me. . Last edited by society2008; 08-15-2011 at 06:43 AM. Reason: edit py format

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
Classic G:RSS: Optimized Google Reader (RSS) for the Nook [BETA Testers needed]	Fmstrat	Barnes & Noble NOOK	24	12-28-2010 01:22 PM
G:RSS: Optimized Google Reader (RSS) for the Kindle 3 (and Nook)	Fmstrat	Amazon Kindle	47	12-13-2010 01:20 PM
Is there a good way to convert partial rss to full rss feeds.	Zorz	Other formats	5	05-29-2010 01:17 PM
RSS?	lordofazeroth	Cybook	5	03-13-2009 07:42 AM
RSS- best out there?	sirdouglas	Kindle Formats	0	12-21-2008 03:38 AM

08-15-2011, 11:58 PM	#3
society2008 Junior Member Posts: 7 Karma: 10 Join Date: Mar 2011 Device: kindle k3	original jhsb.recipe: from calibre.web.feeds.news import BasicNewsRecipe import re class jhsb(BasicNewsRecipe): title = u'jhsb' __author__ = 'zyl' language = 'zh' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://epaper.jinghua.cn/tplimg/logo_080715.gif' language = 'zh' keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'class': 'new_b_b_b'})) def parse_index(self): catnames = {} catnames["http://epaper.jinghua.cn/html/"] = "" feeds = [] for cat in catnames.keys(): articles = [] soup = self.index_to_soup(cat) for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}): url = a['href'].strip() myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) self.log("found %s" % url) articles.append(myarticle) self.log("Adding URL %s\n" %url) if articles: feeds.append((catnames[cat], articles)) return feeds The web may log on without registration,can rdisplay any news.

Advert