View Single Post
Old 04-22-2014, 01:15 PM   #2
jande
Junior Member
jande began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Apr 2014
Device: Kindle Paperwhite
Okay, I continued the work on my recipe. This is the newest version:

Code:
#!/usr/bin/env  python

import re

# (1) import the basic recipe and needed parts from BeautifulSoup

from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString

# (2) declare your class, derived from BasaicNewsRecipe, and set the variable INDEX to the url for the site page with links

class SZOnline(BasicNewsRecipe):

    title      = 'SZ Test'
    __author__ = 'Jan Nikolas Dicke'
    description = 'none'
    INDEX = 'http://www.sz-online.de/'
    language = 'de'


# (5) you will probably want to remove javascript, and may want to disable loading of stylesheets. Here, this does not make much difference, so I have retained the line for future use if desired, but made it a comment using "#"

    remove_javascript = True

# (6) parse_index finds the article links, using the INDEX variable, and 
# looking for links in a DIV with class="contenedor_nuevo". No cover image
# is specified. All subsequent lines here are part of parse_index. See 
# the code for the correct indentation structure

    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
#        ts = soup.find(id='magazineTopStories')
#        ds = self.tag_to_string(ts.find('h1')).split(':')[-1]
#        self.timefmt = ' [%s]'%ds
        cover = None
        feeds = []
#        for section in soup.findAll('div', attrs={'class':'magazineSection'}):
        for section in soup.findAll('header'):
            section_title = self.tag_to_string(section.find('h2'))
            articles = []

# (7) all article links have a "href" attribute
#            for post in section.findAll('h3', attrs={'class':'headline'}):
            for post in section.findAll('a', href=True):
                url = post['href']

# (8) other links may also have a "href" attribute, but article links
# will start with "/", and need the base url appended

                if url.startswith('/'):
                    url = 'http://www.sz-online.de'+url
                    title = self.tag_to_string(post)


#                    self.log('\t\t', desc)

# (11) build the list of article links
                articles.append({'title':title, 'url':url})


# (12) and if any article links have been found, append the article list to the feed list, which is finally returned
            if articles:
                feeds.append((section_title, articles))

        return feeds
Unfortunally, fetching the news end up with the following error:

Code:
calibre, version 1.33.0 (darwin, isfrozen: True)
Konvertierungsfehler: Fehlgeschlagen: Nachrichten abrufen von SZ Test

Nachrichten abrufen von SZ Test
Resolved conversion options
calibre version: 1.33.0
{'asciiize': False,
 'author_sort': None,
 'authors': None,
 'base_font_size': 0,
 'book_producer': None,
 'change_justification': 'original',
 'chapter': None,
 'chapter_mark': 'pagebreak',
 'comments': None,
 'cover': None,
 'debug_pipeline': None,
 'dehyphenate': True,
 'delete_blank_paragraphs': True,
 'disable_font_rescaling': False,
 'dont_compress': False,
 'dont_download_recipe': False,
 'duplicate_links_in_toc': False,
 'embed_all_fonts': False,
 'embed_font_family': None,
 'enable_heuristics': False,
 'expand_css': False,
 'extra_css': None,
 'extract_to': None,
 'filter_css': None,
 'fix_indents': True,
 'font_size_mapping': None,
 'format_scene_breaks': True,
 'html_unwrap_factor': 0.4,
 'input_encoding': None,
 'input_profile': <calibre.customize.profiles.InputProfile object at 0x1091d9110>,
 'insert_blank_line': False,
 'insert_blank_line_size': 0.5,
 'insert_metadata': False,
 'isbn': None,
 'italicize_common_cases': True,
 'keep_ligatures': False,
 'language': None,
 'level1_toc': None,
 'level2_toc': None,
 'level3_toc': None,
 'line_height': 0,
 'linearize_tables': False,
 'lrf': False,
 'margin_bottom': 5.0,
 'margin_left': 5.0,
 'margin_right': 5.0,
 'margin_top': 5.0,
 'markup_chapter_headings': True,
 'max_toc_links': 50,
 'minimum_line_height': 120.0,
 'mobi_file_type': 'old',
 'mobi_ignore_margins': False,
 'mobi_keep_original_images': False,
 'mobi_toc_at_start': False,
 'no_chapters_in_toc': False,
 'no_inline_navbars': False,
 'no_inline_toc': False,
 'output_profile': <calibre.customize.profiles.OutputProfile object at 0x1091d94d0>,
 'page_breaks_before': None,
 'personal_doc': '[PDOC]',
 'prefer_author_sort': False,
 'prefer_metadata_cover': False,
 'pretty_print': False,
 'pubdate': None,
 'publisher': None,
 'rating': None,
 'read_metadata_from_opf': None,
 'remove_fake_margins': True,
 'remove_first_image': False,
 'remove_paragraph_spacing': False,
 'remove_paragraph_spacing_indent_size': 1.5,
 'renumber_headings': True,
 'replace_scene_breaks': '',
 'search_replace': None,
 'series': None,
 'series_index': None,
 'share_not_sync': False,
 'smarten_punctuation': False,
 'sr1_replace': '',
 'sr1_search': '',
 'sr2_replace': '',
 'sr2_search': '',
 'sr3_replace': '',
 'sr3_search': '',
 'start_reading_at': None,
 'subset_embedded_fonts': False,
 'tags': None,
 'test': False,
 'timestamp': None,
 'title': None,
 'title_sort': None,
 'toc_filter': None,
 'toc_threshold': 6,
 'toc_title': None,
 'unsmarten_punctuation': False,
 'unwrap_lines': True,
 'use_auto_toc': False,
 'verbose': 2}
Python function terminated unexpectedly: local variable 'title' referenced before assignment
InputFormatPlugin: Recipe Input running
Using custom recipe
Traceback (most recent call last):
  File "/Applications/calibre.app/Contents/Resources/Python/lib/python2.7/site.py", line 208, in main
    return run_entry_point()
  File "/Applications/calibre.app/Contents/Resources/Python/lib/python2.7/site.py", line 114, in run_entry_point
    return getattr(pmod, func)()
  File "site-packages/calibre/utils/ipc/worker.py", line 195, in main
  File "site-packages/calibre/gui2/convert/gui_conversion.py", line 25, in gui_convert
  File "site-packages/calibre/ebooks/conversion/plumber.py", line 1038, in run
  File "site-packages/calibre/customize/conversion.py", line 241, in __call__
  File "site-packages/calibre/ebooks/conversion/plugins/recipe_input.py", line 117, in convert
  File "site-packages/calibre/web/feeds/news.py", line 982, in download
  File "site-packages/calibre/web/feeds/news.py", line 1147, in build_index
  File "<string>", line 59, in parse_index
UnboundLocalError: local variable 'title' referenced before assignment


May anybody help me, please?

Thanks, Jan
jande is offline   Reply With Quote