|
|
#1 |
|
Junior Member
![]() Posts: 2
Karma: 10
Join Date: Oct 2011
Device: Kindle
|
Issue with Recipe
This recipe was working a few weeks ago, but suddenly stopped. I thought I'd figured out the issue, but it still gives the same error every time I run it.
I know just enough Python to get myself in trouble, but I'm having issues debugging this issue deep enough to figure out what the issue is. Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
import re
class PaizoWebFictionRecipe(BasicNewsRecipe):
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://paizo.com/store/byCompany/p/paizoPublishingLLC/pathfinder/tales/serial')
for alink in soup.findAll("span", {"class" : "productCategory"}):
url = alink.a
name = '{}'.format(url.string)
if url.string != None:
feeds.append((name, 'http://paizo.com' + url['href'] + '&xml=atom'))
if not feeds:
raise NotImplementedError
return feeds
title = u'Pathfinder Web Fiction v2.0.1'
oldest_article = 1000000
max_articles_per_feed = 10
reverse_article_order = True
cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
remove_tags_after = [dict(name='a', text='Tags')]
preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]
Here's the error code: Code:
calibre, version 0.8.23
ERROR: Conversion Error: <b>Failed</b>: Fetch news from Pathfinder Web Fiction v2.0.1
Fetch news from Pathfinder Web Fiction v2.0.1
Resolved conversion options
calibre version: 0.8.23
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_compress': False,
'dont_download_recipe': False,
'duplicate_links_in_toc': False,
'enable_heuristics': False,
'extra_css': None,
'extract_to': None,
'fix_indents': True,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x0573C9F0>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'mobi_ignore_margins': False,
'mobi_toc_at_start': False,
'no_chapters_in_toc': False,
'no_inline_navbars': True,
'no_inline_toc': False,
'output_profile': <calibre.customize.profiles.KindleOutput object at 0x0483B0D0>,
'page_breaks_before': None,
'password': None,
'personal_doc': '[PDOC]',
'prefer_author_sort': False,
'prefer_metadata_cover': False,
'pretty_print': False,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'rescale_images': False,
'series': None,
'series_index': None,
'share_not_sync': False,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'username': None,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Synthesizing mastheadImage
Python function terminated unexpectedly
All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 132, in main
File "site.py", line 109, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 187, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 949, in run
File "site-packages\calibre\customize\conversion.py", line 204, in __call__
File "site-packages\calibre\web\feeds\input.py", line 105, in convert
File "site-packages\calibre\web\feeds\news.py", line 824, in download
File "site-packages\calibre\web\feeds\news.py", line 1003, in build_index
File "site-packages\calibre\web\feeds\news.py", line 852, in feeds2index
File "site-packages\calibre\web\feeds\templates.py", line 40, in generate
File "site-packages\calibre\web\feeds\templates.py", line 95, in _generate
File "site-packages\lxml\builder.py", line 222, in __call__
File "site-packages\lxml\builder.py", line 185, in add_text
File "lxml.etree.pyx", line 916, in lxml.etree._Element.text.__set__ (src/lxml/lxml.etree.c:36134)
File "apihelpers.pxi", line 721, in lxml.etree._setNodeText (src/lxml/lxml.etree.c:17141)
File "apihelpers.pxi", line 1366, in lxml.etree._utf8 (src/lxml/lxml.etree.c:22211)
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
Last edited by raijinfolly; 10-26-2011 at 11:21 PM. Reason: typo |
|
|
|
|
|
#2 |
|
Junior Member
![]() Posts: 2
Karma: 10
Join Date: Oct 2011
Device: Kindle
|
Figured out the issue. It was reading the title attribute of the link instead of the text within it. This caused the issue of processing things like registered trademark symbols as control codes instead of the symbols they were in the feed names.
I split out those parts and ended up with the following code: Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
import re
class PaizoWebFictionRecipe(BasicNewsRecipe):
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://paizo.com/pathfinder/tales/serial')
for alink in soup.findAll("span", {"class" : "productCategory"}):
url = alink.a
name = '{}'.format(url.string)
name = name.split('>').pop()
print 'Story name is : ', name
if url.string != None:
feeds.append((name, 'http://paizo.com' + url['href'] + '&xml=atom'))
if not feeds:
raise NotImplementedError
print 'Feeds are: ', feeds
return feeds
title = 'Pathfinder Web Fiction v2.0.1'
oldest_article = 1000000
max_articles_per_feed = 10
reverse_article_order = True
cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
remove_tags_after = [dict(name='a', text='Tags')]
preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]
|
|
|
|
| Advert | |
|
|
|
|
#3 |
|
Junior Member
![]() Posts: 2
Karma: 10
Join Date: Apr 2012
Device: Android tablet
|
Sorry to resurrect this old thread, but I would really like to use this recipe and even I don't know much about coding...
I managed to fix one of the problems in the recipe, by changing the URL function, but now I'm running to another "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)" Here's the recipe that I'm using: Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
import re
class PaizoWebFictionRecipe(BasicNewsRecipe):
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://paizo.com/pathfinder/tales/serial')
for alink in soup.findAll("span", {"class" : "productCategory"}):
url = alink.a
name = '{}'.format(url.string)
name = name.split('>').pop()
print 'Story name is : ', name
if url.string != None:
feeds.append((name, url['href'] + '&xml=atom'))
if not feeds:
raise NotImplementedError
print 'Feeds are: ', feeds
return feeds
title = 'Pathfinder Web Fiction v2.0.1'
oldest_article = 1000000
max_articles_per_feed = 10
reverse_article_order = True
cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
remove_tags_after = [dict(name='a', text='Tags')]
preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]
Code:
Fetch news from Pathfinder Web Fiction v2.0.1
Resolved conversion options
calibre version: 0.8.47
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'enable_heuristics': False,
'epub_flatten': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x03B973B0>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.TabletOutput object at 0x03B978D0>,
'page_breaks_before': None,
'password': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'username': None,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Story name is : A Lesson in Taxonomy
Story name is : Krunzle the Quick
Story name is : The Ironroot Deception
Story name is : A Passage to Absalom
Story name is : Lord of Penance
Story name is : The Lost Pathfinder
Story name is : Blood and Money
Story name is : Mother Bears
Story name is : The Perfumer's Apprentice
Story name is : Blood Crimes
Story name is : Noble Sacrifice
Story name is : The Secret of the Rose and Glove
Story name is : Certainty
Story name is : Plow and Sword
Story name is : The Seventh Execution
Story name is : Faithful Servants
Story name is : The Box
Story name is : The Swamp Warden
Story name is : Fingers of Death—No, Doom!
Story name is : The Ghosts of Broken Blades
Story name is : The Walkers from the Crypt
Story name is : Guns of Alkenstar
Story name is : The Illusionist
Story name is : Two Pieces of Tarnished Silver
Feeds are: [('A Lesson in Taxonomy', u'http://paizo.com/pathfinder/tales/serial/aLessonInTaxonomy&xml=atom'), ('Krunzle the Quick', u'http://paizo.com/pathfinder/tales/serial/krunzleTheQuick&xml=atom'), ('The Ironroot Deception', u'http://paizo.com/pathfinder/tales/serial/theIronrootDeception&xml=atom'), ('A Passage to Absalom', u'http://paizo.com/pathfinder/tales/serial/aPassageToAbsalom&xml=atom'), ('Lord of Penance', u'http://paizo.com/pathfinder/tales/serial/lordOfPenance&xml=atom'), ('The Lost Pathfinder', u'http://paizo.com/pathfinder/tales/serial/theLostPathfinder&xml=atom'), ('Blood and Money', u'http://paizo.com/pathfinder/tales/serial/bloodAndMoney&xml=atom'), ('Mother Bears', u'http://paizo.com/pathfinder/tales/serial/motherBears&xml=atom'), ("The Perfumer's Apprentice", u'http://paizo.com/pathfinder/tales/serial/thePerfumersApprentice&xml=atom'), ('Blood Crimes', u'http://paizo.com/pathfinder/tales/serial/bloodCrimes&xml=atom'), ('Noble Sacrifice', u'http://paizo.com/pathfinder/tales/serial/nobleSacrifice&xml=atom'), ('The Secret of the Rose and Glove', u'http://paizo.com/pathfinder/tales/serial/theSecretOfTheRoseAndGlove&xml=atom'), ('Certainty', u'http://paizo.com/pathfinder/tales/serial/certainty&xml=atom'), ('Plow and Sword', u'http://paizo.com/pathfinder/tales/serial/plowAndSword&xml=atom'), ('The Seventh Execution', u'http://paizo.com/pathfinder/tales/serial/theSeventhExecution&xml=atom'), ('Faithful Servants', u'http://paizo.com/pathfinder/tales/serial/faithfulServants&xml=atom'), ('The Box', u'http://paizo.com/pathfinder/tales/serial/theBox&xml=atom'), ('The Swamp Warden', u'http://paizo.com/pathfinder/tales/serial/theSwampWarden&xml=atom'), ('Fingers of Death\xe2\x80\x94No, Doom!', u'http://paizo.com/pathfinder/tales/serial/fingersOfDeathNoDoom&xml=atom'), ('The Ghosts of Broken Blades', u'http://paizo.com/pathfinder/tales/serial/theGhostsOfBrokenBlades&xml=atom'), ('The Walkers from the Crypt', u'http://paizo.com/pathfinder/tales/serial/theWalkersFromTheCrypt&xml=atom'), ('Guns of Alkenstar', u'http://paizo.com/pathfinder/tales/serial/gunsOfAlkenstar&xml=atom'), ('The Illusionist', u'http://paizo.com/pathfinder/tales/serial/theIllusionist&xml=atom'), ('Two Pieces of Tarnished Silver', u'http://paizo.com/pathfinder/tales/serial/twoPiecesOfTarnishedSilver&xml=atom')]
Synthesizing mastheadImage
Python function terminated unexpectedly
All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 132, in main
File "site.py", line 109, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 191, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 963, in run
File "site-packages\calibre\customize\conversion.py", line 208, in __call__
File "site-packages\calibre\ebooks\conversion\plugins\recipe_input.py", line 105, in convert
File "site-packages\calibre\web\feeds\news.py", line 861, in download
File "site-packages\calibre\web\feeds\news.py", line 1047, in build_index
File "site-packages\calibre\web\feeds\news.py", line 899, in feeds2index
File "site-packages\calibre\web\feeds\templates.py", line 43, in generate
File "site-packages\calibre\web\feeds\templates.py", line 264, in _generate
File "site-packages\lxml\builder.py", line 222, in __call__
File "site-packages\lxml\builder.py", line 185, in add_text
File "lxml.etree.pyx", line 916, in lxml.etree._Element.text.__set__ (src/lxml/lxml.etree.c:36134)
File "apihelpers.pxi", line 721, in lxml.etree._setNodeText (src/lxml/lxml.etree.c:17141)
File "apihelpers.pxi", line 1366, in lxml.etree._utf8 (src/lxml/lxml.etree.c:22211)
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
|
|
|
|
|
|
#4 |
|
Junior Member
![]() Posts: 2
Karma: 10
Join Date: Apr 2012
Device: Android tablet
|
Any thoughts? I don't really know how to read the python expression that split out the unreadable character, so if someone could point me towards another template I could use, that would be helpful too, thanks!
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Minor issue with padding of Sueddeutsche recipe | aerodynamik | Recipes | 7 | 02-03-2015 04:39 PM |
| Recipe works when mocked up as Python file, fails when converted to Recipe | ode | Recipes | 7 | 09-04-2011 05:57 AM |
| Hyperlinks Issue in NLT Bible (ebook or software issue)? | myet01 | Kobo Reader | 5 | 07-29-2011 09:47 AM |
| 0.5.2 Issue | drmathprog | Calibre | 3 | 03-25-2009 11:33 PM |
| Next issue..... | DeathtoToasters | iRex | 5 | 12-05-2007 03:10 PM |