Junior Member
Posts: 2
Karma: 10
Join Date: Apr 2012
Device: Android tablet
|
Sorry to resurrect this old thread, but I would really like to use this recipe and even I don't know much about coding...
I managed to fix one of the problems in the recipe, by changing the URL function, but now I'm running to another "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)"
Here's the recipe that I'm using:
Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
import re
class PaizoWebFictionRecipe(BasicNewsRecipe):
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://paizo.com/pathfinder/tales/serial')
for alink in soup.findAll("span", {"class" : "productCategory"}):
url = alink.a
name = '{}'.format(url.string)
name = name.split('>').pop()
print 'Story name is : ', name
if url.string != None:
feeds.append((name, url['href'] + '&xml=atom'))
if not feeds:
raise NotImplementedError
print 'Feeds are: ', feeds
return feeds
title = 'Pathfinder Web Fiction v2.0.1'
oldest_article = 1000000
max_articles_per_feed = 10
reverse_article_order = True
cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
remove_tags_after = [dict(name='a', text='Tags')]
preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]
Here's the error code:
Code:
Fetch news from Pathfinder Web Fiction v2.0.1
Resolved conversion options
calibre version: 0.8.47
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'enable_heuristics': False,
'epub_flatten': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x03B973B0>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.TabletOutput object at 0x03B978D0>,
'page_breaks_before': None,
'password': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'username': None,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Story name is : A Lesson in Taxonomy
Story name is : Krunzle the Quick
Story name is : The Ironroot Deception
Story name is : A Passage to Absalom
Story name is : Lord of Penance
Story name is : The Lost Pathfinder
Story name is : Blood and Money
Story name is : Mother Bears
Story name is : The Perfumer's Apprentice
Story name is : Blood Crimes
Story name is : Noble Sacrifice
Story name is : The Secret of the Rose and Glove
Story name is : Certainty
Story name is : Plow and Sword
Story name is : The Seventh Execution
Story name is : Faithful Servants
Story name is : The Box
Story name is : The Swamp Warden
Story name is : Fingers of Death—No, Doom!
Story name is : The Ghosts of Broken Blades
Story name is : The Walkers from the Crypt
Story name is : Guns of Alkenstar
Story name is : The Illusionist
Story name is : Two Pieces of Tarnished Silver
Feeds are: [('A Lesson in Taxonomy', u'http://paizo.com/pathfinder/tales/serial/aLessonInTaxonomy&xml=atom'), ('Krunzle the Quick', u'http://paizo.com/pathfinder/tales/serial/krunzleTheQuick&xml=atom'), ('The Ironroot Deception', u'http://paizo.com/pathfinder/tales/serial/theIronrootDeception&xml=atom'), ('A Passage to Absalom', u'http://paizo.com/pathfinder/tales/serial/aPassageToAbsalom&xml=atom'), ('Lord of Penance', u'http://paizo.com/pathfinder/tales/serial/lordOfPenance&xml=atom'), ('The Lost Pathfinder', u'http://paizo.com/pathfinder/tales/serial/theLostPathfinder&xml=atom'), ('Blood and Money', u'http://paizo.com/pathfinder/tales/serial/bloodAndMoney&xml=atom'), ('Mother Bears', u'http://paizo.com/pathfinder/tales/serial/motherBears&xml=atom'), ("The Perfumer's Apprentice", u'http://paizo.com/pathfinder/tales/serial/thePerfumersApprentice&xml=atom'), ('Blood Crimes', u'http://paizo.com/pathfinder/tales/serial/bloodCrimes&xml=atom'), ('Noble Sacrifice', u'http://paizo.com/pathfinder/tales/serial/nobleSacrifice&xml=atom'), ('The Secret of the Rose and Glove', u'http://paizo.com/pathfinder/tales/serial/theSecretOfTheRoseAndGlove&xml=atom'), ('Certainty', u'http://paizo.com/pathfinder/tales/serial/certainty&xml=atom'), ('Plow and Sword', u'http://paizo.com/pathfinder/tales/serial/plowAndSword&xml=atom'), ('The Seventh Execution', u'http://paizo.com/pathfinder/tales/serial/theSeventhExecution&xml=atom'), ('Faithful Servants', u'http://paizo.com/pathfinder/tales/serial/faithfulServants&xml=atom'), ('The Box', u'http://paizo.com/pathfinder/tales/serial/theBox&xml=atom'), ('The Swamp Warden', u'http://paizo.com/pathfinder/tales/serial/theSwampWarden&xml=atom'), ('Fingers of Death\xe2\x80\x94No, Doom!', u'http://paizo.com/pathfinder/tales/serial/fingersOfDeathNoDoom&xml=atom'), ('The Ghosts of Broken Blades', u'http://paizo.com/pathfinder/tales/serial/theGhostsOfBrokenBlades&xml=atom'), ('The Walkers from the Crypt', u'http://paizo.com/pathfinder/tales/serial/theWalkersFromTheCrypt&xml=atom'), ('Guns of Alkenstar', u'http://paizo.com/pathfinder/tales/serial/gunsOfAlkenstar&xml=atom'), ('The Illusionist', u'http://paizo.com/pathfinder/tales/serial/theIllusionist&xml=atom'), ('Two Pieces of Tarnished Silver', u'http://paizo.com/pathfinder/tales/serial/twoPiecesOfTarnishedSilver&xml=atom')]
Synthesizing mastheadImage
Python function terminated unexpectedly
All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 132, in main
File "site.py", line 109, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 191, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 963, in run
File "site-packages\calibre\customize\conversion.py", line 208, in __call__
File "site-packages\calibre\ebooks\conversion\plugins\recipe_input.py", line 105, in convert
File "site-packages\calibre\web\feeds\news.py", line 861, in download
File "site-packages\calibre\web\feeds\news.py", line 1047, in build_index
File "site-packages\calibre\web\feeds\news.py", line 899, in feeds2index
File "site-packages\calibre\web\feeds\templates.py", line 43, in generate
File "site-packages\calibre\web\feeds\templates.py", line 264, in _generate
File "site-packages\lxml\builder.py", line 222, in __call__
File "site-packages\lxml\builder.py", line 185, in add_text
File "lxml.etree.pyx", line 916, in lxml.etree._Element.text.__set__ (src/lxml/lxml.etree.c:36134)
File "apihelpers.pxi", line 721, in lxml.etree._setNodeText (src/lxml/lxml.etree.c:17141)
File "apihelpers.pxi", line 1366, in lxml.etree._utf8 (src/lxml/lxml.etree.c:22211)
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
Any thoughts/advice would be much appreciated - I'm trying to learn some python through this process so I can help myself more in the future, so any explanatory notes will be appreciated too! (I started reading http://oldbugs.calibre-ebook.com/wik...s:atasteofsoup but wasn't quite sure how to apply it to my specific problem...
|