Newsweek Recipe Error

lui1 · 06-16-2018, 02:19 PM

I think there is a problem with the Newsweek recipe. By the way, I think calibre is a pretty cool program, so thank to Kovid Goyal and contributors for making it. Here is the error message I receive when trying to download it.

Newsweek Recipe Error Message:

Code:

Fetch news from Newsweek
Conversion options changed from defaults:
  verbose: 2
  output_profile: 'generic_eink_hd'
Resolved conversion options
calibre version: 3.26.0
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_download_recipe': False,
 'dont_split_on_page_breaks': True,
 'duplicate_links_in_toc': False,
 'embed_all_fonts': False,
 'embed_font_family': None,
 'enable_heuristics': False,
 'epub_flatten': False,
 'epub_inline_toc': False,
 'epub_toc_at_end': False,
 'epub_version': '2',
 'expand_css': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'flow_size': 260,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0xaff832ac>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'no_chapters_in_toc': False,
 'no_default_epub_cover': False,
 'no_inline_navbars': False,
 'no_svg_cover': False,
 'output_profile': <calibre.customize.profiles.GenericEinkHD object at 0xaff8390c>,
 'page_breaks_before': None,
 'prefer_metadata_cover': False,
 'preserve_cover_aspect_ratio': False,
 'pretty_print': True,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'search_replace': None,
 'series': None,
 'series_index': None,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'start_reading_at': None,
 'subset_embedded_fonts': False,
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'toc_title': None,
 'transform_css_rules': None,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'verbose': 2}
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
  File "site.py", line 77, in main
  File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
  File "site-packages/calibre/gui2/convert/gui_conversion.py", line 26, in gui_convert
  File "site-packages/calibre/ebooks/conversion/plumber.py", line 1088, in run
  File "site-packages/calibre/customize/conversion.py", line 244, in __call__
  File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 119, in convert
  File "site-packages/calibre/web/feeds/news.py", line 1018, in download
  File "site-packages/calibre/web/feeds/news.py", line 1185, in build_index
  File "<string>", line 53, in parse_index
IndexError: list index out of range

kovidgoyal · 06-16-2018, 11:22 PM

https://github.com/kovidgoyal/calibr...d91be95f1f978d

lui1 · 08-03-2018, 12:44 PM

Here is the error message i got. Thanks.

Code:

Fetch news from Newsweek
Conversion options changed from defaults:
  verbose: 2
  output_profile: 'generic_eink_hd'
Resolved conversion options
calibre version: 3.28.0
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_download_recipe': False,
 'dont_split_on_page_breaks': True,
 'duplicate_links_in_toc': False,
 'embed_all_fonts': False,
 'embed_font_family': None,
 'enable_heuristics': False,
 'epub_flatten': False,
 'epub_inline_toc': False,
 'epub_toc_at_end': False,
 'epub_version': '2',
 'expand_css': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'flow_size': 260,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0xaff7882c>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'no_chapters_in_toc': False,
 'no_default_epub_cover': False,
 'no_inline_navbars': False,
 'no_svg_cover': False,
 'output_profile': <calibre.customize.profiles.GenericEinkHD object at 0xaff78e8c>,
 'page_breaks_before': None,
 'prefer_metadata_cover': False,
 'preserve_cover_aspect_ratio': False,
 'pretty_print': True,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'search_replace': None,
 'series': None,
 'series_index': None,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'start_reading_at': None,
 'subset_embedded_fonts': False,
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'toc_title': None,
 'transform_css_rules': None,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'verbose': 2}
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
  File "site.py", line 77, in main
  File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
  File "site-packages/calibre/gui2/convert/gui_conversion.py", line 26, in gui_convert
  File "site-packages/calibre/ebooks/conversion/plumber.py", line 1106, in run
  File "site-packages/calibre/customize/conversion.py", line 246, in __call__
  File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 120, in convert
  File "site-packages/calibre/web/feeds/news.py", line 1018, in download
  File "site-packages/calibre/web/feeds/news.py", line 1185, in build_index
  File "<string>", line 52, in parse_index
IndexError: list index out of range

lui1 · 08-03-2018, 07:21 PM

Here is how I fixed the recipe. I added some tags to be removed because they are not part of the articles.

Newsweek Recipe Fix:

Code:

from calibre.web.feeds.news import BasicNewsRecipe
from collections import defaultdict

BASE = 'http://www.newsweek.com'


def href_to_url(a, add_piano=False):
    return BASE + a.get('href') + ('?piano_d=1' if add_piano else '')


def class_sels(*args):
    q = set(args)
    return dict(attrs={'class': lambda x: x and set(x.split()).intersection(q)})


class Newsweek(BasicNewsRecipe):

    title = 'Newsweek'
    __author__ = 'Kovid Goyal'
    description = 'Weekly news and current affairs in the US'
    language = 'en'
    encoding = 'utf-8'
    no_stylesheets = True
    requires_version = (1, 40, 0)

    keep_only_tags = class_sels(
        'article-header', 'article-body', 'header-image')
    remove_tags = [
        dict(name='aside'),
        dict(name='meta'),
        class_sels(
            'block-openadstream', 'block-ibtmedia-social', 'issue-next',
            'most-popular', 'ibt-media-stories', 'user-btn-group',
            'trial-link', 'trc_related_container',
            'block-ibtmedia-top-stories', 'videocontent', 'newsletter-signup',
            'in-text-slideshows', 'content-correction'
        ),
        dict(id=['taboola-below-main-column', 'piano-root',
                 'block-nw-magazine-magazine-more-from-issue']),
    ]
    remove_attributes = ['style']

    def parse_index(self):
        root = self.index_to_soup(
            'http://www.newsweek.com/archive', as_tree=True)
        li = root.xpath(
            '//ul[contains(@class, "magazine-archive-items")]/li')[0]
        a = li.xpath('descendant::a[@href]')[0]
        url = href_to_url(a, add_piano=True)
        self.timefmt = self.tag_to_string(a)
        img = li.xpath('descendant::a[@href]/img[@src]')[0]
        self.cover_url = img.get('src')
        root = self.index_to_soup(url, as_tree=True)
        features = []
        href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
        try:
            div = root.xpath('//div[@id="block-nw-magazine-magazine-features"]')[0]
        except IndexError:
            pass
        else:
            for a in div.xpath(href_xpath):
                title = self.tag_to_string(a)
                article = a.xpath('ancestor::article')[0]
                desc = ''
                s = article.xpath('descendant::div[@class="summary"]')
                if s:
                    desc = self.tag_to_string(s[0])
                self.log(title, url)
                features.append({'title': title, 'url': href_to_url(a), 'description': desc})

        index = []
        if features:
            index.append(('Features', features))
        sections = defaultdict(list)
        for block in ('magazine-magazine-issue-story-list', 'editors-pick'):
            div = root.xpath(
                '//div[@id="block-nw-{}"]'.format(block))[0]
            for a in div.xpath(href_xpath):
                title = self.tag_to_string(a)
                article = a.xpath('ancestor::article')[0]
                desc = ''
                s = article.xpath('descendant::div[@class="summary"]')
                if s:
                    desc = self.tag_to_string(s[0])
                sec = article.xpath('descendant::div[@class="category"]')
                if sec:
                    sec = self.tag_to_string(sec[0])
                else:
                    sec = 'Articles'
                sections[sec].append(
                    {'title': title, 'url': href_to_url(a), 'description': desc})
                self.log(title, url)
                if desc:
                    self.log('\t' + desc)
                self.log('')
        for k in sorted(sections):
            index.append((k, sections[k]))
        return index

    def print_version(self, url):
        return url + '?piano_d=1'

    def preprocess_html(self, soup):
        # Parallax images in the articles are loaded as background images
        # on <span> tags. Convert them to normal images.
        for span in soup.findAll('span', attrs={'class': lambda x: x and 'parallax' in x.split()}):
            s = span.find(style=True)
            if s is not None:
                url = s['style'].partition('(')[-1][:-1]
                s['style'] = 'display: block'
                s.name = 'img'
                s['src'] = url
        return soup

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
Newsweek recipe now fails	NSILMike	Recipes	6	08-02-2017 06:40 PM
Newsweek Polska - fixed recipe	admroz	Recipes	1	10-16-2013 02:14 PM
Newsweek recipe broken?	NSILMike	Recipes	3	08-04-2011 10:02 PM
newsweek recipe failure	scwehrl	Calibre	12	05-19-2009 04:57 PM
Newsweek Recipe	SnafuRacer	Calibre	5	07-07-2008 02:35 PM

06-16-2018, 11:22 PM	#2
kovidgoyal creator of calibre Posts: 44,340 Karma: 23661992 Join Date: Oct 2006 Location: Mumbai, India Device: Various	https://github.com/kovidgoyal/calibr...d91be95f1f978d