|
|
#1 |
|
Enthusiast
![]() Posts: 36
Karma: 10
Join Date: Dec 2017
Location: Los Angeles, CA
Device: Smart Phone
|
Newsweek Recipe Error
I think there is a problem with the Newsweek recipe. By the way, I think calibre is a pretty cool program, so thank to Kovid Goyal and contributors for making it. Here is the error message I receive when trying to download it.
Newsweek Recipe Error Message: Code:
Fetch news from Newsweek
Conversion options changed from defaults:
verbose: 2
output_profile: 'generic_eink_hd'
Resolved conversion options
calibre version: 3.26.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'epub_flatten': False,
'epub_inline_toc': False,
'epub_toc_at_end': False,
'epub_version': '2',
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0xaff832ac>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.GenericEinkHD object at 0xaff8390c>,
'page_breaks_before': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'transform_css_rules': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
File "site.py", line 77, in main
File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
File "site-packages/calibre/gui2/convert/gui_conversion.py", line 26, in gui_convert
File "site-packages/calibre/ebooks/conversion/plumber.py", line 1088, in run
File "site-packages/calibre/customize/conversion.py", line 244, in __call__
File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 119, in convert
File "site-packages/calibre/web/feeds/news.py", line 1018, in download
File "site-packages/calibre/web/feeds/news.py", line 1185, in build_index
File "<string>", line 53, in parse_index
IndexError: list index out of range
|
|
|
|
|
|
#2 |
|
creator of calibre
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,597
Karma: 28548962
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
|
|
|
|
|
| Advert | |
|
|
|
|
#3 |
|
Enthusiast
![]() Posts: 36
Karma: 10
Join Date: Dec 2017
Location: Los Angeles, CA
Device: Smart Phone
|
Newsweek stopped working.
Here is the error message i got. Thanks.
Code:
Fetch news from Newsweek
Conversion options changed from defaults:
verbose: 2
output_profile: 'generic_eink_hd'
Resolved conversion options
calibre version: 3.28.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'epub_flatten': False,
'epub_inline_toc': False,
'epub_toc_at_end': False,
'epub_version': '2',
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0xaff7882c>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.GenericEinkHD object at 0xaff78e8c>,
'page_breaks_before': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'transform_css_rules': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
File "site.py", line 77, in main
File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
File "site-packages/calibre/gui2/convert/gui_conversion.py", line 26, in gui_convert
File "site-packages/calibre/ebooks/conversion/plumber.py", line 1106, in run
File "site-packages/calibre/customize/conversion.py", line 246, in __call__
File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 120, in convert
File "site-packages/calibre/web/feeds/news.py", line 1018, in download
File "site-packages/calibre/web/feeds/news.py", line 1185, in build_index
File "<string>", line 52, in parse_index
IndexError: list index out of range
|
|
|
|
|
|
#4 |
|
Enthusiast
![]() Posts: 36
Karma: 10
Join Date: Dec 2017
Location: Los Angeles, CA
Device: Smart Phone
|
The Fix
Here is how I fixed the recipe. I added some tags to be removed because they are not part of the articles.
Newsweek Recipe Fix: Code:
from calibre.web.feeds.news import BasicNewsRecipe
from collections import defaultdict
BASE = 'http://www.newsweek.com'
def href_to_url(a, add_piano=False):
return BASE + a.get('href') + ('?piano_d=1' if add_piano else '')
def class_sels(*args):
q = set(args)
return dict(attrs={'class': lambda x: x and set(x.split()).intersection(q)})
class Newsweek(BasicNewsRecipe):
title = 'Newsweek'
__author__ = 'Kovid Goyal'
description = 'Weekly news and current affairs in the US'
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
requires_version = (1, 40, 0)
keep_only_tags = class_sels(
'article-header', 'article-body', 'header-image')
remove_tags = [
dict(name='aside'),
dict(name='meta'),
class_sels(
'block-openadstream', 'block-ibtmedia-social', 'issue-next',
'most-popular', 'ibt-media-stories', 'user-btn-group',
'trial-link', 'trc_related_container',
'block-ibtmedia-top-stories', 'videocontent', 'newsletter-signup',
'in-text-slideshows', 'content-correction'
),
dict(id=['taboola-below-main-column', 'piano-root',
'block-nw-magazine-magazine-more-from-issue']),
]
remove_attributes = ['style']
def parse_index(self):
root = self.index_to_soup(
'http://www.newsweek.com/archive', as_tree=True)
li = root.xpath(
'//ul[contains(@class, "magazine-archive-items")]/li')[0]
a = li.xpath('descendant::a[@href]')[0]
url = href_to_url(a, add_piano=True)
self.timefmt = self.tag_to_string(a)
img = li.xpath('descendant::a[@href]/img[@src]')[0]
self.cover_url = img.get('src')
root = self.index_to_soup(url, as_tree=True)
features = []
href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
try:
div = root.xpath('//div[@id="block-nw-magazine-magazine-features"]')[0]
except IndexError:
pass
else:
for a in div.xpath(href_xpath):
title = self.tag_to_string(a)
article = a.xpath('ancestor::article')[0]
desc = ''
s = article.xpath('descendant::div[@class="summary"]')
if s:
desc = self.tag_to_string(s[0])
self.log(title, url)
features.append({'title': title, 'url': href_to_url(a), 'description': desc})
index = []
if features:
index.append(('Features', features))
sections = defaultdict(list)
for block in ('magazine-magazine-issue-story-list', 'editors-pick'):
div = root.xpath(
'//div[@id="block-nw-{}"]'.format(block))[0]
for a in div.xpath(href_xpath):
title = self.tag_to_string(a)
article = a.xpath('ancestor::article')[0]
desc = ''
s = article.xpath('descendant::div[@class="summary"]')
if s:
desc = self.tag_to_string(s[0])
sec = article.xpath('descendant::div[@class="category"]')
if sec:
sec = self.tag_to_string(sec[0])
else:
sec = 'Articles'
sections[sec].append(
{'title': title, 'url': href_to_url(a), 'description': desc})
self.log(title, url)
if desc:
self.log('\t' + desc)
self.log('')
for k in sorted(sections):
index.append((k, sections[k]))
return index
def print_version(self, url):
return url + '?piano_d=1'
def preprocess_html(self, soup):
# Parallax images in the articles are loaded as background images
# on <span> tags. Convert them to normal images.
for span in soup.findAll('span', attrs={'class': lambda x: x and 'parallax' in x.split()}):
s = span.find(style=True)
if s is not None:
url = s['style'].partition('(')[-1][:-1]
s['style'] = 'display: block'
s.name = 'img'
s['src'] = url
return soup
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Newsweek recipe now fails | NSILMike | Recipes | 6 | 08-02-2017 06:40 PM |
| Newsweek Polska - fixed recipe | admroz | Recipes | 1 | 10-16-2013 02:14 PM |
| Newsweek recipe broken? | NSILMike | Recipes | 3 | 08-04-2011 10:02 PM |
| newsweek recipe failure | scwehrl | Calibre | 12 | 05-19-2009 04:57 PM |
| Newsweek Recipe | SnafuRacer | Calibre | 5 | 07-07-2008 02:35 PM |