View Single Post
Old 03-15-2021, 06:29 PM   #2
d.r.l
Member
d.r.l began at the beginning.
 
Posts: 17
Karma: 10
Join Date: Jun 2015
Location: USA (expat from Belgium)
Device: Android Table
I kept digging

So, I kept digging and realised the recipe was a tad outdated and missed the code to log in as a browser before downloading the issues.

I have an error in the customized recipe I tried to come up with.

Here's my recipe as it stands

Code:
from calibre.web.feeds.news import BasicNewsRecipe
import re

def get_browser(self):
    br = BasicNewsRecipe.get_browser(self)
    if self.username is not None and self.password is not None:
        br.open('https://skepticalinquirer.org/member-login/')
        br.select_form(name='login')
        br['HIDDEN']   = self.username
        br['HIDDEN'] = self.password
        br.submit()
    return br


class TheSkepticalInquirer(BasicNewsRecipe):
    title = u'The Skeptical Inquirer'
    description = 'Investigation of fringe science and paranormal claims.'
    language = 'en'
    __author__ = 'Starson17'
    oldest_article = 31
    cover_url = 'https://skepticalinquirer.org/wp-content/uploads/sites/29/2019/03/SI-logo-tag-line-w.png'
    remove_empty_feeds = True
    remove_javascript = True
    max_articles_per_feed = 50
    no_stylesheets = True

    keep_only_tags = [dict(name='div', attrs={'id': ['content', 'bio']})]

    remove_tags = [
        dict(name='div', attrs={'id': ['socialMedia']}),
    ]

    preprocess_regexps = [
        (re.compile(r'\.\(JavaScript must be enabled to view this email address\)',
                    re.DOTALL | re.IGNORECASE), lambda match: ''),
    ]

    def parse_index(self):
        feeds = []
        for title, url in [("The Skeptical Inquirer", "https://skepticalinquirer.org/archive/")]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds

    def make_links(self, url):
        soup = self.index_to_soup(url)
        title = ''
        current_articles = []
        for item in soup.findAll(attrs={'class': ['article-single bigger']}):
            page_url = url + str(item.a["href"])
            title = str(item.a.string)
            current_articles.append(
                {'title': title, 'url': page_url, 'description': '', 'date': ''})
        return current_articles

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''


calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
Here's the result

Code:
calibre, version 5.13.0 (win32, embedded-python: True)
Conversion error: Failed: Fetch news from The Skeptical Inquirer

Fetch news from The Skeptical Inquirer
Conversion options changed from defaults:
  verbose: 2
  output_profile: 'tablet'
Resolved conversion options
calibre version: 5.13.0
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_download_recipe': False,
 'dont_split_on_page_breaks': True,
 'duplicate_links_in_toc': False,
 'embed_all_fonts': False,
 'embed_font_family': None,
 'enable_heuristics': False,
 'epub_flatten': False,
 'epub_inline_toc': False,
 'epub_toc_at_end': False,
 'epub_version': '2',
 'expand_css': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'flow_size': 260,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0x000002415DD06A00>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'no_chapters_in_toc': False,
 'no_default_epub_cover': False,
 'no_inline_navbars': False,
 'no_svg_cover': False,
 'output_profile': <calibre.customize.profiles.TabletOutput object at 0x000002415DD2B340>,
 'page_breaks_before': None,
 'prefer_metadata_cover': False,
 'preserve_cover_aspect_ratio': False,
 'pretty_print': True,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'search_replace': None,
 'series': None,
 'series_index': None,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'start_reading_at': None,
 'subset_embedded_fonts': False,
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'toc_title': None,
 'transform_css_rules': None,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'verbose': 2}
InputFormatPlugin: Recipe Input running
Downloading recipe urn: custom:1000
Using user agent: Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
Traceback (most recent call last):
  File "runpy.py", line 194, in _run_module_as_main
  File "runpy.py", line 87, in _run_code
  File "site.py", line 82, in <module>
  File "site.py", line 77, in main
  File "site.py", line 49, in run_entry_point
  File "calibre\utils\ipc\worker.py", line 216, in main
  File "calibre\gui2\convert\gui_conversion.py", line 34, in gui_convert_recipe
  File "calibre\gui2\convert\gui_conversion.py", line 28, in gui_convert
  File "calibre\ebooks\conversion\plumber.py", line 1105, in run
  File "calibre\customize\conversion.py", line 245, in __call__
  File "calibre\ebooks\conversion\plugins\recipe_input.py", line 139, in convert
  File "calibre\web\feeds\news.py", line 1051, in download
  File "calibre\web\feeds\news.py", line 1231, in build_index
ValueError: No articles found, aborting
To clarify what I'm trying to achieve is to have Calibre download all the past issues I have access to, which are in PDF format and listed by date in the website with a link pointing to each issue. Here's a snipped of few links
https://skepticalinquirer.org/wp-con...1-proof-v2.pdf
https://skepticalinquirer.org/wp-con...21-reduced.pdf

I'm sure something is wrong with my code, I'll keep on searching for a solution in the meantime someone helps out here (not a complaint, I simply can't help myself )

Quote:
Originally Posted by d.r.l View Post
Dear team,

I'm trying to setup calibre to download the Skeptical Enquirer magazines I'm subscribed to.
I've setup the schedule for download, first day of the month, never delete older news and keep at most: all issues.

Then, hitting download now or ok does not prompt for my logon information.
First question: how can I add my logon information?

I have the following error message:

Code:
calibre, version 5.13.0 (win32, embedded-python: True)
Conversion error: Failed: Fetch news from The Skeptical Inquirer

Fetch news from The Skeptical Inquirer
Conversion options changed from defaults:
  verbose: 2
  output_profile: 'generic_eink'
Resolved conversion options
calibre version: 5.13.0
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_download_recipe': False,
 'dont_split_on_page_breaks': True,
 'duplicate_links_in_toc': False,
 'embed_all_fonts': False,
 'embed_font_family': None,
 'enable_heuristics': False,
 'epub_flatten': False,
 'epub_inline_toc': False,
 'epub_toc_at_end': False,
 'epub_version': '2',
 'expand_css': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'flow_size': 260,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0x000002963C786A00>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'no_chapters_in_toc': False,
 'no_default_epub_cover': False,
 'no_inline_navbars': False,
 'no_svg_cover': False,
 'output_profile': <calibre.customize.profiles.GenericEink object at 0x000002963C786D00>,
 'page_breaks_before': None,
 'prefer_metadata_cover': False,
 'preserve_cover_aspect_ratio': False,
 'pretty_print': True,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'search_replace': None,
 'series': None,
 'series_index': None,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'start_reading_at': None,
 'subset_embedded_fonts': False,
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'toc_title': None,
 'transform_css_rules': None,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'verbose': 2}
InputFormatPlugin: Recipe Input running
Downloading recipe urn: builtin:skeptical_enquirer
Trying to get latest version of recipe: skeptical_enquirer
Using user agent: Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
Traceback (most recent call last):
  File "runpy.py", line 194, in _run_module_as_main
  File "runpy.py", line 87, in _run_code
  File "site.py", line 82, in <module>
  File "site.py", line 77, in main
  File "site.py", line 49, in run_entry_point
  File "calibre\utils\ipc\worker.py", line 216, in main
  File "calibre\gui2\convert\gui_conversion.py", line 34, in gui_convert_recipe
  File "calibre\gui2\convert\gui_conversion.py", line 28, in gui_convert
  File "calibre\ebooks\conversion\plumber.py", line 1105, in run
  File "calibre\customize\conversion.py", line 245, in __call__
  File "calibre\ebooks\conversion\plugins\recipe_input.py", line 139, in convert
  File "calibre\web\feeds\news.py", line 1051, in download
  File "calibre\web\feeds\news.py", line 1231, in build_index
ValueError: No articles found, aborting
Any idea?

Thanks,
d.r.l is offline   Reply With Quote