View Single Post
Old 10-07-2012, 06:51 PM   #5
PeterT
Taking a break; Fed up
PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.PeterT ought to be getting tired of karma fortunes by now.
 
PeterT's Avatar
 
Posts: 6,785
Karma: 43933000
Join Date: Nov 2007
Location: Toronto
Device: Wife: Touch, Arc, Vox Me: Nexus 7, Glo
Quote:
Originally Posted by penguin View Post
it seems to work again. Add the following highlighted parts. Only the last one is needed to make the recipe work again. The others are changes in style and title.


@Keksfabrik
maybe you can report if it fix your problem

Spoiler:

Code:
#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2011, Nikolas Mangold <nmangold at gmail.com>'
'''
spiegel.de
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
from calibre import re

class DerSpiegel(BasicNewsRecipe):
    title                  = 'Der Spiegel '
    __author__             = 'Nikolas Mangold'
    description            = 'Der Spiegel, Printed Edition. Access to paid content.'
    publisher              = 'SPIEGEL-VERLAG RUDOLF AUGSTEIN GMBH & CO. KG'
    category               = 'news, politics, Germany'
    no_stylesheets         = False
    encoding               = 'cp1252'
    needs_subscription     = True
    remove_empty_feeds     = True
    delay                  = 1
    PREFIX                 = 'http://m.spiegel.de'
    INDEX                  = PREFIX + '/spiegel/print/epaper/index-heftaktuell.html'
    use_embedded_content   = False
    masthead_url = 'http://upload.wikimedia.org/wikipedia/en/thumb/1/17/Der_Spiegel_logo.svg/200px-Der_Spiegel_logo.svg.png'
    language               = 'de'
    publication_type       = 'magazine'
    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '
    timefmt = '[%U/%Y]'
    title = title + strftime(timefmt)
    empty_articles = ['Titelbild']
    preprocess_regexps = [
        (re.compile(r'<p>◆</p>', re.DOTALL|re.IGNORECASE), lambda match: '<hr>'),
        ]

    def get_browser(self):
        def has_login_name(form):
            try:
                form.find_control(name="f.loginName")
            except:
                return False
            else:
                return True

        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.PREFIX + '/meinspiegel/login.html')
            br.select_form(predicate=has_login_name)
            br['f.loginName'    ] = self.username
            br['f.password'] = self.password
            br.submit()
        return br

    remove_tags_before =  dict(attrs={'class':'spArticleContent'})
    remove_tags_after  =  dict(attrs={'class':'spArticleCredit'})

    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)

        cover = soup.find('img', width=248)
        if cover is not None:
            self.cover_url = cover['src']

        index = soup.find('dl')

        feeds = []
        for section in index.findAll('dt'):
            section_title = self.tag_to_string(section).strip()
            self.log('Found section ', section_title)

            articles = []
            for article in section.findNextSiblings(['dd','dt']):
                if article.name == 'dt':
                    break
                link = article.find('a',href=True)
                title = self.tag_to_string(link).strip()
                if title in self.empty_articles:
                    continue
                self.log('Found article ', title)
                url = self.PREFIX + link['href']
                articles.append({'title' : title, 'date' : strftime(self.timefmt), 'url' : url})
            feeds.append((section_title,articles))
        return feeds;
Remember that indentation is key...
PeterT is offline   Reply With Quote