Member
Posts: 17
Karma: 10
Join Date: Jun 2015
Location: USA (expat from Belgium)
Device: Android Table
|
I kept digging
So, I kept digging and realised the recipe was a tad outdated and missed the code to log in as a browser before downloading the issues.
I have an error in the customized recipe I tried to come up with.
Here's my recipe as it stands
Code:
from calibre.web.feeds.news import BasicNewsRecipe
import re
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('https://skepticalinquirer.org/member-login/')
br.select_form(name='login')
br['HIDDEN'] = self.username
br['HIDDEN'] = self.password
br.submit()
return br
class TheSkepticalInquirer(BasicNewsRecipe):
title = u'The Skeptical Inquirer'
description = 'Investigation of fringe science and paranormal claims.'
language = 'en'
__author__ = 'Starson17'
oldest_article = 31
cover_url = 'https://skepticalinquirer.org/wp-content/uploads/sites/29/2019/03/SI-logo-tag-line-w.png'
remove_empty_feeds = True
remove_javascript = True
max_articles_per_feed = 50
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'id': ['content', 'bio']})]
remove_tags = [
dict(name='div', attrs={'id': ['socialMedia']}),
]
preprocess_regexps = [
(re.compile(r'\.\(JavaScript must be enabled to view this email address\)',
re.DOTALL | re.IGNORECASE), lambda match: ''),
]
def parse_index(self):
feeds = []
for title, url in [("The Skeptical Inquirer", "https://skepticalinquirer.org/archive/")]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
soup = self.index_to_soup(url)
title = ''
current_articles = []
for item in soup.findAll(attrs={'class': ['article-single bigger']}):
page_url = url + str(item.a["href"])
title = str(item.a.string)
current_articles.append(
{'title': title, 'url': page_url, 'description': '', 'date': ''})
return current_articles
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
Here's the result
Code:
calibre, version 5.13.0 (win32, embedded-python: True)
Conversion error: Failed: Fetch news from The Skeptical Inquirer
Fetch news from The Skeptical Inquirer
Conversion options changed from defaults:
verbose: 2
output_profile: 'tablet'
Resolved conversion options
calibre version: 5.13.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'epub_flatten': False,
'epub_inline_toc': False,
'epub_toc_at_end': False,
'epub_version': '2',
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x000002415DD06A00>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.TabletOutput object at 0x000002415DD2B340>,
'page_breaks_before': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'transform_css_rules': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Downloading recipe urn: custom:1000
Using user agent: Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
Traceback (most recent call last):
File "runpy.py", line 194, in _run_module_as_main
File "runpy.py", line 87, in _run_code
File "site.py", line 82, in <module>
File "site.py", line 77, in main
File "site.py", line 49, in run_entry_point
File "calibre\utils\ipc\worker.py", line 216, in main
File "calibre\gui2\convert\gui_conversion.py", line 34, in gui_convert_recipe
File "calibre\gui2\convert\gui_conversion.py", line 28, in gui_convert
File "calibre\ebooks\conversion\plumber.py", line 1105, in run
File "calibre\customize\conversion.py", line 245, in __call__
File "calibre\ebooks\conversion\plugins\recipe_input.py", line 139, in convert
File "calibre\web\feeds\news.py", line 1051, in download
File "calibre\web\feeds\news.py", line 1231, in build_index
ValueError: No articles found, aborting
To clarify what I'm trying to achieve is to have Calibre download all the past issues I have access to, which are in PDF format and listed by date in the website with a link pointing to each issue. Here's a snipped of few links
https://skepticalinquirer.org/wp-con...1-proof-v2.pdf
https://skepticalinquirer.org/wp-con...21-reduced.pdf
I'm sure something is wrong with my code, I'll keep on searching for a solution in the meantime someone helps out here (not a complaint, I simply can't help myself  )
Quote:
Originally Posted by d.r.l
Dear team,
I'm trying to setup calibre to download the Skeptical Enquirer magazines I'm subscribed to.
I've setup the schedule for download, first day of the month, never delete older news and keep at most: all issues.
Then, hitting download now or ok does not prompt for my logon information.
First question: how can I add my logon information?
I have the following error message:
Code:
calibre, version 5.13.0 (win32, embedded-python: True)
Conversion error: Failed: Fetch news from The Skeptical Inquirer
Fetch news from The Skeptical Inquirer
Conversion options changed from defaults:
verbose: 2
output_profile: 'generic_eink'
Resolved conversion options
calibre version: 5.13.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'epub_flatten': False,
'epub_inline_toc': False,
'epub_toc_at_end': False,
'epub_version': '2',
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x000002963C786A00>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.GenericEink object at 0x000002963C786D00>,
'page_breaks_before': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'transform_css_rules': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Downloading recipe urn: builtin:skeptical_enquirer
Trying to get latest version of recipe: skeptical_enquirer
Using user agent: Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
Traceback (most recent call last):
File "runpy.py", line 194, in _run_module_as_main
File "runpy.py", line 87, in _run_code
File "site.py", line 82, in <module>
File "site.py", line 77, in main
File "site.py", line 49, in run_entry_point
File "calibre\utils\ipc\worker.py", line 216, in main
File "calibre\gui2\convert\gui_conversion.py", line 34, in gui_convert_recipe
File "calibre\gui2\convert\gui_conversion.py", line 28, in gui_convert
File "calibre\ebooks\conversion\plumber.py", line 1105, in run
File "calibre\customize\conversion.py", line 245, in __call__
File "calibre\ebooks\conversion\plugins\recipe_input.py", line 139, in convert
File "calibre\web\feeds\news.py", line 1051, in download
File "calibre\web\feeds\news.py", line 1231, in build_index
ValueError: No articles found, aborting
Any idea?
Thanks,
|
|