View Single Post
Old 04-18-2012, 01:06 PM   #3
psytrooper
Junior Member
psytrooper began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Apr 2012
Device: Android tablet
Sorry to resurrect this old thread, but I would really like to use this recipe and even I don't know much about coding...

I managed to fix one of the problems in the recipe, by changing the URL function, but now I'm running to another "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)"

Here's the recipe that I'm using:

Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed

import re

class PaizoWebFictionRecipe(BasicNewsRecipe):
  def get_feeds(self):
    feeds = []
    soup = self.index_to_soup('http://paizo.com/pathfinder/tales/serial')
    for alink in soup.findAll("span", {"class" : "productCategory"}):
      url = alink.a
      name = '{}'.format(url.string)
      name = name.split('>').pop()
      print 'Story name is : ', name
      if url.string != None:
        feeds.append((name, url['href'] + '&xml=atom'))
    if not feeds:
      raise NotImplementedError
    print 'Feeds are: ', feeds
    return feeds
  
  title = 'Pathfinder Web Fiction v2.0.1'
  oldest_article = 1000000
  max_articles_per_feed = 10
  reverse_article_order = True
  cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
  remove_tags_after = [dict(name='a', text='Tags')]
  preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]
Here's the error code:

Code:
Fetch news from Pathfinder Web Fiction v2.0.1
Resolved conversion options
calibre version: 0.8.47
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_download_recipe': False,
 'dont_split_on_page_breaks': True,
 'duplicate_links_in_toc': False,
 'enable_heuristics': False,
 'epub_flatten': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'flow_size': 260,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0x03B973B0>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'no_chapters_in_toc': False,
 'no_default_epub_cover': False,
 'no_inline_navbars': False,
 'no_svg_cover': False,
 'output_profile': <calibre.customize.profiles.TabletOutput object at 0x03B978D0>,
 'page_breaks_before': None,
 'password': None,
 'prefer_metadata_cover': False,
 'preserve_cover_aspect_ratio': False,
 'pretty_print': True,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'series': None,
 'series_index': None,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'username': None,
 'verbose': 2}
InputFormatPlugin: Recipe Input running
Story name is :  A Lesson in Taxonomy
Story name is :  Krunzle the Quick
Story name is :  The Ironroot Deception
Story name is :  A Passage to Absalom
Story name is :  Lord of Penance
Story name is :  The Lost Pathfinder
Story name is :  Blood and Money
Story name is :  Mother Bears
Story name is :  The Perfumer's Apprentice
Story name is :  Blood Crimes
Story name is :  Noble Sacrifice
Story name is :  The Secret of the Rose and Glove
Story name is :  Certainty
Story name is :  Plow and Sword
Story name is :  The Seventh Execution
Story name is :  Faithful Servants
Story name is :  The Box
Story name is :  The Swamp Warden
Story name is :  Fingers of Death—No, Doom!
Story name is :  The Ghosts of Broken Blades
Story name is :  The Walkers from the Crypt
Story name is :  Guns of Alkenstar
Story name is :  The Illusionist
Story name is :  Two Pieces of Tarnished Silver
Feeds are:  [('A Lesson in Taxonomy', u'http://paizo.com/pathfinder/tales/serial/aLessonInTaxonomy&xml=atom'), ('Krunzle the Quick', u'http://paizo.com/pathfinder/tales/serial/krunzleTheQuick&xml=atom'), ('The Ironroot Deception', u'http://paizo.com/pathfinder/tales/serial/theIronrootDeception&xml=atom'), ('A Passage to Absalom', u'http://paizo.com/pathfinder/tales/serial/aPassageToAbsalom&xml=atom'), ('Lord of Penance', u'http://paizo.com/pathfinder/tales/serial/lordOfPenance&xml=atom'), ('The Lost Pathfinder', u'http://paizo.com/pathfinder/tales/serial/theLostPathfinder&xml=atom'), ('Blood and Money', u'http://paizo.com/pathfinder/tales/serial/bloodAndMoney&xml=atom'), ('Mother Bears', u'http://paizo.com/pathfinder/tales/serial/motherBears&xml=atom'), ("The Perfumer's Apprentice", u'http://paizo.com/pathfinder/tales/serial/thePerfumersApprentice&xml=atom'), ('Blood Crimes', u'http://paizo.com/pathfinder/tales/serial/bloodCrimes&xml=atom'), ('Noble Sacrifice', u'http://paizo.com/pathfinder/tales/serial/nobleSacrifice&xml=atom'), ('The Secret of the Rose and Glove', u'http://paizo.com/pathfinder/tales/serial/theSecretOfTheRoseAndGlove&xml=atom'), ('Certainty', u'http://paizo.com/pathfinder/tales/serial/certainty&xml=atom'), ('Plow and Sword', u'http://paizo.com/pathfinder/tales/serial/plowAndSword&xml=atom'), ('The Seventh Execution', u'http://paizo.com/pathfinder/tales/serial/theSeventhExecution&xml=atom'), ('Faithful Servants', u'http://paizo.com/pathfinder/tales/serial/faithfulServants&xml=atom'), ('The Box', u'http://paizo.com/pathfinder/tales/serial/theBox&xml=atom'), ('The Swamp Warden', u'http://paizo.com/pathfinder/tales/serial/theSwampWarden&xml=atom'), ('Fingers of Death\xe2\x80\x94No, Doom!', u'http://paizo.com/pathfinder/tales/serial/fingersOfDeathNoDoom&xml=atom'), ('The Ghosts of Broken Blades', u'http://paizo.com/pathfinder/tales/serial/theGhostsOfBrokenBlades&xml=atom'), ('The Walkers from the Crypt', u'http://paizo.com/pathfinder/tales/serial/theWalkersFromTheCrypt&xml=atom'), ('Guns of Alkenstar', u'http://paizo.com/pathfinder/tales/serial/gunsOfAlkenstar&xml=atom'), ('The Illusionist', u'http://paizo.com/pathfinder/tales/serial/theIllusionist&xml=atom'), ('Two Pieces of Tarnished Silver', u'http://paizo.com/pathfinder/tales/serial/twoPiecesOfTarnishedSilver&xml=atom')]
Synthesizing mastheadImage
Python function terminated unexpectedly
  All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters (Error Code: 1)
Traceback (most recent call last):
  File "site.py", line 132, in main
  File "site.py", line 109, in run_entry_point
  File "site-packages\calibre\utils\ipc\worker.py", line 191, in main
  File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
  File "site-packages\calibre\ebooks\conversion\plumber.py", line 963, in run
  File "site-packages\calibre\customize\conversion.py", line 208, in __call__
  File "site-packages\calibre\ebooks\conversion\plugins\recipe_input.py", line 105, in convert
  File "site-packages\calibre\web\feeds\news.py", line 861, in download
  File "site-packages\calibre\web\feeds\news.py", line 1047, in build_index
  File "site-packages\calibre\web\feeds\news.py", line 899, in feeds2index
  File "site-packages\calibre\web\feeds\templates.py", line 43, in generate
  File "site-packages\calibre\web\feeds\templates.py", line 264, in _generate
  File "site-packages\lxml\builder.py", line 222, in __call__
  File "site-packages\lxml\builder.py", line 185, in add_text
  File "lxml.etree.pyx", line 916, in lxml.etree._Element.text.__set__ (src/lxml/lxml.etree.c:36134)
  File "apihelpers.pxi", line 721, in lxml.etree._setNodeText (src/lxml/lxml.etree.c:17141)
  File "apihelpers.pxi", line 1366, in lxml.etree._utf8 (src/lxml/lxml.etree.c:22211)
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
Any thoughts/advice would be much appreciated - I'm trying to learn some python through this process so I can help myself more in the future, so any explanatory notes will be appreciated too! (I started reading http://oldbugs.calibre-ebook.com/wik...s:atasteofsoup but wasn't quite sure how to apply it to my specific problem...
psytrooper is offline   Reply With Quote