Junior Member
Posts: 2
Karma: 10
Join Date: Apr 2014
Device: Kindle Paperwhite
|
Okay, I continued the work on my recipe. This is the newest version:
Code:
#!/usr/bin/env python
import re
# (1) import the basic recipe and needed parts from BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
# (2) declare your class, derived from BasaicNewsRecipe, and set the variable INDEX to the url for the site page with links
class SZOnline(BasicNewsRecipe):
title = 'SZ Test'
__author__ = 'Jan Nikolas Dicke'
description = 'none'
INDEX = 'http://www.sz-online.de/'
language = 'de'
# (5) you will probably want to remove javascript, and may want to disable loading of stylesheets. Here, this does not make much difference, so I have retained the line for future use if desired, but made it a comment using "#"
remove_javascript = True
# (6) parse_index finds the article links, using the INDEX variable, and
# looking for links in a DIV with class="contenedor_nuevo". No cover image
# is specified. All subsequent lines here are part of parse_index. See
# the code for the correct indentation structure
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
# ts = soup.find(id='magazineTopStories')
# ds = self.tag_to_string(ts.find('h1')).split(':')[-1]
# self.timefmt = ' [%s]'%ds
cover = None
feeds = []
# for section in soup.findAll('div', attrs={'class':'magazineSection'}):
for section in soup.findAll('header'):
section_title = self.tag_to_string(section.find('h2'))
articles = []
# (7) all article links have a "href" attribute
# for post in section.findAll('h3', attrs={'class':'headline'}):
for post in section.findAll('a', href=True):
url = post['href']
# (8) other links may also have a "href" attribute, but article links
# will start with "/", and need the base url appended
if url.startswith('/'):
url = 'http://www.sz-online.de'+url
title = self.tag_to_string(post)
# self.log('\t\t', desc)
# (11) build the list of article links
articles.append({'title':title, 'url':url})
# (12) and if any article links have been found, append the article list to the feed list, which is finally returned
if articles:
feeds.append((section_title, articles))
return feeds
Unfortunally, fetching the news end up with the following error:
Code:
calibre, version 1.33.0 (darwin, isfrozen: True)
Konvertierungsfehler: Fehlgeschlagen: Nachrichten abrufen von SZ Test
Nachrichten abrufen von SZ Test
Resolved conversion options
calibre version: 1.33.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_compress': False,
'dont_download_recipe': False,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x1091d9110>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'mobi_file_type': 'old',
'mobi_ignore_margins': False,
'mobi_keep_original_images': False,
'mobi_toc_at_start': False,
'no_chapters_in_toc': False,
'no_inline_navbars': False,
'no_inline_toc': False,
'output_profile': <calibre.customize.profiles.OutputProfile object at 0x1091d94d0>,
'page_breaks_before': None,
'personal_doc': '[PDOC]',
'prefer_author_sort': False,
'prefer_metadata_cover': False,
'pretty_print': False,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'share_not_sync': False,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
Python function terminated unexpectedly: local variable 'title' referenced before assignment
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
File "/Applications/calibre.app/Contents/Resources/Python/lib/python2.7/site.py", line 208, in main
return run_entry_point()
File "/Applications/calibre.app/Contents/Resources/Python/lib/python2.7/site.py", line 114, in run_entry_point
return getattr(pmod, func)()
File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
File "site-packages/calibre/gui2/convert/gui_conversion.py", line 25, in gui_convert
File "site-packages/calibre/ebooks/conversion/plumber.py", line 1038, in run
File "site-packages/calibre/customize/conversion.py", line 241, in __call__
File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 117, in convert
File "site-packages/calibre/web/feeds/news.py", line 982, in download
File "site-packages/calibre/web/feeds/news.py", line 1147, in build_index
File "<string>", line 59, in parse_index
UnboundLocalError: local variable 'title' referenced before assignment
May anybody help me, please?
Thanks, Jan
|