MobileRead Forums - View Single Post

Sushi5675 · 05-21-2023, 03:23 AM

Hi,

i still dont get it to work... Thanks @unkn0wn for all your input.

The initial login procedure works, but probably it's not staying logged in (without javascript?). Maybe we need something similar to wsj or irish times recipes?

Current status is:

Code:

# -*- coding: utf-8 -*-
__license__ = 'GPL v3'

'''
Fetch sueddeutsche.de
'''
from calibre.web.feeds.news import BasicNewsRecipe, classes

class Sueddeutsche(BasicNewsRecipe):

    title = u'SZ'
    description = 'News from Germany, Access to online content'
    publisher = u'Süddeutsche Zeitung'
    category = 'news, politics, Germany'
    timefmt = ' [%a, %d %b %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
    language = 'de'
    encoding = 'utf-8'
    publication_type = 'newspaper'
    remove_attributes = ['style', 'height', 'width']
    needs_subscription = True
    use_embedded_content = False
    no_stylesheets = True
    
    def get_browser(self):
        
        def is_form_login(form):
            return "id" in form.attrs and form.attrs['id'] == "login-form"
        
        browser = BasicNewsRecipe.get_browser(self)

        url = 'https://id.sueddeutsche.de/login'
        browser.open(url)

        browser.select_form(predicate=is_form_login)
        #browser.select_form(nr=0)  
        browser['login'] = self.username
        browser['password'] = self.password
        browser.submit()

        return browser
    
    keep_only_tags = [
        classes('lp_is_start custom-1qvpywd')
    ]
    
    remove_tags = [
        dict(name=['button', 'aside', 'nav']),
        classes('teaserable-layout teaserable-layout--teaser')
    ]

    feeds = [	
         (u'SZ', u'https://www.sueddeutsche.de/news/rss'),       
    ]
    
    def preprocess_html(self, soup):
        for pic in soup.findAll('picture'):
            if nos := pic.find('noscript'):
                nos.name = 'div'
        for img in soup.findAll('img', attrs={'src':lambda n: n and n.startswith('data:')}):
            img.extract()
        return soup
    
    def print_version(self, url):
        return url.split('?')[0]