MobileRead Forums - View Single Post

jakev383 · 03-23-2011, 11:47 AM

I'm running Calibre 0.7.38 on Fedora 14, and whenever I try and convert a html book to epub it crashes. I've captured the crash output here:

ERROR: Conversion Error: Failed: Convert book 1 of 1 (The drowned world)

Convert book 1 of 1 (The drowned world)
Processing archive...
Resolved conversion options
calibre version: 0.7.38
{'asciiize': True,
'author_sort': None,
'authors': None,
'base_font_size': 0.0,
'book_producer': None,
'breadth_first': False,
'change_justification': u'original',
'chapter': u"//*[((name()='h1' or name()='h2') and re:test(., 'chapter|book|section|part|prologue|epilogue\\s+', 'i')) or @class = 'chapter']",
'chapter_mark': u'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'disable_font_rescaling': False,
'dont_package': False,
'dont_split_on_page_breaks': False,
'epub_flatten': False,
'extra_css': None,
'extract_to': None,
'flow_size': 260,
'font_size_mapping': None,
'footer_regex': u'(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)* \\s*)?\\d+ \\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)* \\s*)?.*? \\s*\\d+))(?= )' ,
'header_regex': u'(?i)(?<=<hr>)((\\s*<a name=\\d+></a>((<img.+?>)* \\s*)?\\d+ \\s*.*?\\s*)|(\\s* <a name=\\d+></a>((<img.+?>)* \\s*)?.*? \\s*\\d+))(?= )' ,
'html_unwrap_factor': 0.4,
'input_encoding': '',
'input_profile': <calibre.customize.profiles.InputProfile object at 0x378d6d0>,
'insert_blank_line': False,
'insert_metadata': False,
'isbn': None,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0.0,
'linearize_tables': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'max_levels': 5,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.NookOutput object at 0x378ddd0>,
'page_breaks_before': u"//*[name()='h1' or name()='h2']",
'prefer_metadata_cover': False,
'preprocess_html': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': '/tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_5evi0W.opf',
'remove_first_image': False,
'remove_footer': False,
'remove_header': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'tags': None,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: HTML Input running
on /tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_hdYewr_plumber/content.opf
Parsing all content...
Traceback (most recent call last):
File "/usr/bin/calibre-parallel", line 19, in <module>
sys.exit(main())
File "/usr/lib64/calibre/calibre/utils/ipc/worker.py", line 106, in main
result = func(*args, **kwargs)
File "/usr/lib64/calibre/calibre/gui2/convert/gui_conversion.py", line 24, in gui_convert
plumber.run()
File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 853, in run
accelerators, tdir)
File "/usr/lib64/calibre/calibre/customize/conversion.py", line 216, in __call__
log, accelerators)
File "/usr/lib64/calibre/calibre/ebooks/html/input.py", line 299, in convert
encoding=opts.input_encoding)
File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 990, in create_oebbook
reader()(oeb, path_or_stream)
File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 71, in __call__
opf = self._read_opf()
File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 104, in _read_opf
data = self.oeb.decode(data)
File "/usr/lib64/calibre/calibre/ebooks/oeb/base.py", line 1897, in decode
return fix_data(data.decode(self.input_encoding, 'replace'))
LookupError: unknown encoding:

I'm not sure what it means by unknown encoding. It successfully imports the HTML to ZIP, and only crashes when I convert to EPUB.
I've been saving them as text and then adding them back into my library and converting that way, but it's not ideal. Anyone have any ideas, or know of a bulk HTML to txt/PDF (I know, gasp!) converter that would make this easier on me?
Thanks!

03-23-2011, 11:47 AM	#1
jakev383 Junior Member Posts: 2 Karma: 10 Join Date: Mar 2011 Device: Nook	HTML to EPUB conversion crashes I'm running Calibre 0.7.38 on Fedora 14, and whenever I try and convert a html book to epub it crashes. I've captured the crash output here: ERROR: Conversion Error: <b>Failed</b>: Convert book 1 of 1 (The drowned world) Convert book 1 of 1 (The drowned world) Processing archive... Resolved conversion options calibre version: 0.7.38 {'asciiize': True, 'author_sort': None, 'authors': None, 'base_font_size': 0.0, 'book_producer': None, 'breadth_first': False, 'change_justification': u'original', 'chapter': u"//[((name()='h1' or name()='h2') and re:test(., 'chapter\|book\|section\|part\|prologue\|epilogue\\s+', 'i')) or @class = 'chapter']", 'chapter_mark': u'pagebreak', 'comments': None, 'cover': None, 'debug_pipeline': None, 'disable_font_rescaling': False, 'dont_package': False, 'dont_split_on_page_breaks': False, 'epub_flatten': False, 'extra_css': None, 'extract_to': None, 'flow_size': 260, 'font_size_mapping': None, 'footer_regex': u'(?i)(?<=<hr>)((\\s<a name=\\d+></a>((<img.+?>)<br>\\s)?\\d+<br>\\s.?\\s)\|(\\s <a name=\\d+></a>((<img.+?>)<br>\\s)?.?<br>\\s\\d+))(?=<br>)' , 'header_regex': u'(?i)(?<=<hr>)((\\s<a name=\\d+></a>((<img.+?>)<br>\\s)?\\d+<br>\\s.?\\s)\|(\\s* <a name=\\d+></a>((<img.+?>)<br>\\s)?.?<br>\\s\\d+))(?=<br>)' , 'html_unwrap_factor': 0.4, 'input_encoding': '', 'input_profile': <calibre.customize.profiles.InputProfile object at 0x378d6d0>, 'insert_blank_line': False, 'insert_metadata': False, 'isbn': None, 'keep_ligatures': False, 'language': None, 'level1_toc': None, 'level2_toc': None, 'level3_toc': None, 'line_height': 0.0, 'linearize_tables': False, 'margin_bottom': 5.0, 'margin_left': 5.0, 'margin_right': 5.0, 'margin_top': 5.0, 'max_levels': 5, 'max_toc_links': 50, 'minimum_line_height': 120.0, 'no_chapters_in_toc': False, 'no_default_epub_cover': False, 'no_inline_navbars': False, 'no_svg_cover': False, 'output_profile': <calibre.customize.profiles.NookOutput object at 0x378ddd0>, 'page_breaks_before': u"//[name()='h1' or name()='h2']", 'prefer_metadata_cover': False, 'preprocess_html': False, 'preserve_cover_aspect_ratio': False, 'pretty_print': True, 'pubdate': None, 'publisher': None, 'rating': None, 'read_metadata_from_opf': '/tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_5evi0W.opf', 'remove_first_image': False, 'remove_footer': False, 'remove_header': False, 'remove_paragraph_spacing': False, 'remove_paragraph_spacing_indent_size': 1.5, 'series': None, 'series_index': None, 'smarten_punctuation': False, 'tags': None, 'timestamp': None, 'title': None, 'title_sort': None, 'toc_filter': None, 'toc_threshold': 6, 'use_auto_toc': False, 'verbose': 2} InputFormatPlugin: HTML Input running on /tmp/calibre_0.7.38_tmp_ArlGun/calibre_0.7.38_hdYewr_plumber/content.opf Parsing all content... Traceback (most recent call last): File "/usr/bin/calibre-parallel", line 19, in <module> sys.exit(main()) File "/usr/lib64/calibre/calibre/utils/ipc/worker.py", line 106, in main result = func(args, **kwargs) File "/usr/lib64/calibre/calibre/gui2/convert/gui_conversion.py", line 24, in gui_convert plumber.run() File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 853, in run accelerators, tdir) File "/usr/lib64/calibre/calibre/customize/conversion.py", line 216, in __call__ log, accelerators) File "/usr/lib64/calibre/calibre/ebooks/html/input.py", line 299, in convert encoding=opts.input_encoding) File "/usr/lib64/calibre/calibre/ebooks/conversion/plumber.py", line 990, in create_oebbook reader()(oeb, path_or_stream) File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 71, in __call__ opf = self._read_opf() File "/usr/lib64/calibre/calibre/ebooks/oeb/reader.py", line 104, in _read_opf data = self.oeb.decode(data) File "/usr/lib64/calibre/calibre/ebooks/oeb/base.py", line 1897, in decode return fix_data(data.decode(self.input_encoding, 'replace')) LookupError: unknown encoding: I'm not sure what it means by unknown encoding. It successfully imports the HTML to ZIP, and only crashes when I convert to EPUB. I've been saving them as text and then adding them back into my library and converting that way, but it's not ideal. Anyone have any ideas, or know of a bulk HTML to txt/PDF (I know, gasp!) converter that would make this easier on me? Thanks!