Enthusiast
Posts: 30
Karma: 16
Join Date: Sep 2009
Device: sony prs-505/600
|
GoComics.com - error returns
Hi again,
I tried as you suggested and removed the reverse command. I set the size to 900 and the number of days to 1.
When I ran it with a few comics (5) it worked fine. When I added a few it returns an error.
Thanks again for your help!
-M
The recipe ==========================================
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'Copyright 2010 Starson17'
'''
www.gocomics.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
import urllib, re, mechanize
class GoComics(BasicNewsRecipe):
title = 'GoComics Reversed'
__author__ = 'Starson17'
__version__ = '1.01'
__date__ = '13 March 2010'
description = '200+ Comics - Customize for more days/comics: Defaults to 7 days, 15 comics - 10 general, 5 editorial.'
language = 'en'
use_embedded_content= False
no_stylesheets = True
remove_javascript = True
cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
num_comics_to_get = 1
# comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
comic_size = 900
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
# Please do not overload their servers by selecting all comics and 1000 strips from each!
keep_only_tags = [dict(name='div', attrs={'class':['feature','banner']}),
]
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
dict(name='div', attrs={'class':['tag-wrapper']}),
dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
orig_open_novisit = br.open_novisit
def my_open_no_visit(url, **kwargs):
req = mechanize.Request(
url,
headers = {
'Referer':'http://www.gocomics.com/',
})
return orig_open_novisit(req)
br.open_novisit = my_open_no_visit
return br
def parse_index(self):
feeds = []
for title, url in [
(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
(u"B.C.", u"http://www.gocomics.com/bc"),
(u"Baldo", u"http://www.gocomics.com/baldo"),
(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
(u"Cathy", u"http://www.gocomics.com/cathy"),
(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
(u"Close to Home", u"http://www.gocomics.com/closetohome"),
(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
(u"The Duplex", u"http://www.gocomics.com/duplex"),
(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
(u"Garfield", u"http://www.gocomics.com/garfield"),
(u"Housebroken", u"http://www.gocomics.com/housebroken"),
(u"Momma", u"http://www.gocomics.com/momma"),
(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
(u"Pickles", u"http://www.gocomics.com/pickles"),
(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
(u"Shoe", u"http://www.gocomics.com/shoe"),
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
(u"Ziggy", u"http://www.gocomics.com/ziggy"),
(u"Chuck Asay", u"http://www.gocomics.com/chuckasay"),
(u"Tony Auth", u"http://www.gocomics.com/tonyauth"),
(u"Jeff Danziger", u"http://www.gocomics.com/jeffdanziger"),
(u"Walt Handelsman", u"http://www.gocomics.com/walthandelsman"),
(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
(u"Pat Oliphant", u"http://www.gocomics.com/patoliphant"),
(u"Ted Rall", u"http://www.gocomics.com/tedrall"),
(u"Small World", u"http://www.gocomics.com/smallworld"),
(u"Tom Toles", u"http://www.gocomics.com/tomtoles"),
(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
description = ''
date = ''
current_articles = []
pages = range(1, self.num_comics_to_get+1)
for page in pages:
page_soup = self.index_to_soup(url)
if page_soup:
strip_title = page_soup.h1.a.string
date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
title = strip_title + ' - ' + date_title
strip_url_date = page_soup.h1.a['href']
prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
page_url = 'http://www.gocomics.com' + strip_url_date
prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
url = prev_page_url
#current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
if soup.title:
title_string = soup.title.string.strip()
_cd = title_string.split(',',1)[1]
comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
if soup.h1.span:
artist = soup.h1.span.string
soup.h1.span.string.replaceWith(comic_date + artist)
feature_item = soup.find('p',attrs={'class':'feature_item'})
if feature_item.a:
a_tag = feature_item.a
a_href = a_tag["href"]
img_tag = a_tag.img
img_tag["src"] = a_href
img_tag["width"] = self.comic_size
img_tag["height"] = None
return soup
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
==================================================
Error returned (with trace):
ERROR: Conversion Error: <b>Failed</b>: Fetch news from GoComics_test
Fetch news from GoComics_test
Resolved conversion options
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_justify': False,
'dont_split_on_page_breaks': True,
'extra_css': None,
'extract_to': None,
'flow_size': 260,
'font_size_mapping': None,
'footer_regex': '(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)*<br>\\s*)?\\d+<br>\\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)*<br>\\s*)?.*?<br>\\s*\\d+))(?=<br>)' ,
'header_regex': '(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)*<br>\\s*)?\\d+<br>\\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)*<br>\\s*)?.*?<br>\\s*\\d+))(?=<br>)' ,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x02C84170>,
'insert_blank_line': False,
'insert_metadata': False,
'isbn': None,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'max_toc_links': 50,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'output_profile': <calibre.customize.profiles.SonyReaderOutput object at 0x02C84370>,
'page_breaks_before': None,
'password': None,
'prefer_metadata_cover': False,
'preprocess_html': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_first_image': False,
'remove_footer': False,
'remove_header': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'series': None,
'series_index': None,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'use_auto_toc': False,
'username': None,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Python function terminated unexpectedly
'NoneType' object has no attribute 'a' (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 103, in main
File "site.py", line 85, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 99, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 24, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 787, in run
File "site-packages\calibre\customize\conversion.py", line 211, in __call__
File "site-packages\calibre\web\feeds\input.py", line 104, in convert
File "site-packages\calibre\web\feeds\news.py", line 632, in download
File "site-packages\calibre\web\feeds\news.py", line 749, in build_index
File "c:\docume~1\username\locals~1\temp\calibre_0.6.45 _wc3fjl_recipes\recipe0.py", line 96, in parse_index
articles = self.make_links(url)
File "c:\docume~1\username\locals~1\temp\calibre_0.6.45 _wc3fjl_recipes\recipe0.py", line 110, in make_links
strip_title = page_soup.h1.a.string
AttributeError: 'NoneType' object has no attribute 'a'
|