View Single Post
Old 03-23-2011, 10:47 AM   #1
jakev383
Junior Member
jakev383 began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Mar 2011
Device: Nook
HTML to EPUB conversion crashes

I'm running Calibre 0.7.38 on Fedora 14, and whenever I try and convert a html book to epub it crashes. I've captured the crash output here:

ERROR: Conversion Error: <b>Failed</b>: Convert book 1 of 1 (The drowned world)

Convert book 1 of 1 (The drowned world)
Processing archive...
Resolved conversion options
calibre version: 0.7.38
{'asciiize': True,
'author_sort': None,
'authors': None,
'base_font_size': 0.0,
'book_producer': None,
'breadth_first': False,
'change_justification': u'original',
'chapter': u"//*[((name()='h1' or name()='h2') and re:test(., 'chapter|book|section|part|prologue|epilogue\\s+', 'i')) or @class = 'chapter']",
'chapter_mark': u'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'disable_font_rescaling': False,
'dont_package': False,
'dont_split_on_page_breaks': False,
'epub_flatten': False,
'extra_css': None,
'extract_to': None,
'flow_size': 260,
'font_size_mapping': None,
'footer_regex': u'(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)*<br>\\s*)?\\d+<br>\\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)*<br>\\s*)?.*?<br>\\s*\\d+))(?=<br>)' ,
'header_regex': u'(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)*<br>\\s*)?\\d+<br>\\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)*<br>\\s*)?.*?<br>\\s*\\d+))(?=<br>)' ,
'html_unwrap_factor': 0.4,
'input_encoding': '',
'input_profile': <calibre.customize.profiles.InputProfile object at 0x378d6d0>,
'insert_blank_line': False,
'insert_metadata': False,
'isbn': None,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0.0,
'linearize_tables': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'max_levels': 5,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.NookOutput object at 0x378ddd0>,
'page_breaks_before': u"//*[name()='h1' or name()='h2']",
'prefer_metadata_cover': False,
'preprocess_html': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': '/tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_5evi0W.opf',
'remove_first_image': False,
'remove_footer': False,
'remove_header': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'tags': None,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: HTML Input running
on /tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_hdYewr_plumber/content.opf
Parsing all content...
Traceback (most recent call last):
File "/usr/bin/calibre-parallel", line 19, in <module>
sys.exit(main())
File "/usr/lib64/calibre/calibre/utils/ipc/worker.py", line 106, in main
result = func(*args, **kwargs)
File "/usr/lib64/calibre/calibre/gui2/convert/gui_conversion.py", line 24, in gui_convert
plumber.run()
File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 853, in run
accelerators, tdir)
File "/usr/lib64/calibre/calibre/customize/conversion.py", line 216, in __call__
log, accelerators)
File "/usr/lib64/calibre/calibre/ebooks/html/input.py", line 299, in convert
encoding=opts.input_encoding)
File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 990, in create_oebbook
reader()(oeb, path_or_stream)
File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 71, in __call__
opf = self._read_opf()
File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 104, in _read_opf
data = self.oeb.decode(data)
File "/usr/lib64/calibre/calibre/ebooks/oeb/base.py", line 1897, in decode
return fix_data(data.decode(self.input_encoding, 'replace'))
LookupError: unknown encoding:


I'm not sure what it means by unknown encoding. It successfully imports the HTML to ZIP, and only crashes when I convert to EPUB.
I've been saving them as text and then adding them back into my library and converting that way, but it's not ideal. Anyone have any ideas, or know of a bulk HTML to txt/PDF (I know, gasp!) converter that would make this easier on me?
Thanks!
jakev383 is offline   Reply With Quote