View Single Post
Old 05-21-2023, 02:23 AM   #12
Sushi5675
Junior Member
Sushi5675 began at the beginning.
 
Posts: 8
Karma: 10
Join Date: Mar 2023
Device: kindle paperwhite
Hi,

i still dont get it to work... Thanks @unkn0wn for all your input.

The initial login procedure works, but probably it's not staying logged in (without javascript?). Maybe we need something similar to wsj or irish times recipes?

Current status is:

Code:
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'

'''
Fetch sueddeutsche.de
'''
from calibre.web.feeds.news import BasicNewsRecipe, classes

class Sueddeutsche(BasicNewsRecipe):

    title = u'SZ'
    description = 'News from Germany, Access to online content'
    publisher = u'Süddeutsche Zeitung'
    category = 'news, politics, Germany'
    timefmt = ' [%a, %d %b %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
    language = 'de'
    encoding = 'utf-8'
    publication_type = 'newspaper'
    remove_attributes = ['style', 'height', 'width']
    needs_subscription = True
    use_embedded_content = False
    no_stylesheets = True
    
    def get_browser(self):
        
        def is_form_login(form):
            return "id" in form.attrs and form.attrs['id'] == "login-form"
        
        browser = BasicNewsRecipe.get_browser(self)

        url = 'https://id.sueddeutsche.de/login'
        browser.open(url)

        browser.select_form(predicate=is_form_login)
        #browser.select_form(nr=0)  
        browser['login'] = self.username
        browser['password'] = self.password
        browser.submit()

        return browser
    
    keep_only_tags = [
        classes('lp_is_start custom-1qvpywd')
    ]
    
    remove_tags = [
        dict(name=['button', 'aside', 'nav']),
        classes('teaserable-layout teaserable-layout--teaser')
    ]

    feeds = [	
         (u'SZ', u'https://www.sueddeutsche.de/news/rss'),       
    ]
    
    def preprocess_html(self, soup):
        for pic in soup.findAll('picture'):
            if nos := pic.find('noscript'):
                nos.name = 'div'
        for img in soup.findAll('img', attrs={'src':lambda n: n and n.startswith('data:')}):
            img.extract()
        return soup
    
    def print_version(self, url):
        return url.split('?')[0]
Sushi5675 is offline   Reply With Quote