Hi,
i still dont get it to work... Thanks @unkn0wn for all your input.
The initial login procedure works, but probably it's not staying logged in (without javascript?). Maybe we need something similar to wsj or irish times recipes?
Current status is:
Code:
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
Fetch sueddeutsche.de
'''
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Sueddeutsche(BasicNewsRecipe):
title = u'SZ'
description = 'News from Germany, Access to online content'
publisher = u'Süddeutsche Zeitung'
category = 'news, politics, Germany'
timefmt = ' [%a, %d %b %Y]'
oldest_article = 1
max_articles_per_feed = 100
language = 'de'
encoding = 'utf-8'
publication_type = 'newspaper'
remove_attributes = ['style', 'height', 'width']
needs_subscription = True
use_embedded_content = False
no_stylesheets = True
def get_browser(self):
def is_form_login(form):
return "id" in form.attrs and form.attrs['id'] == "login-form"
browser = BasicNewsRecipe.get_browser(self)
url = 'https://id.sueddeutsche.de/login'
browser.open(url)
browser.select_form(predicate=is_form_login)
#browser.select_form(nr=0)
browser['login'] = self.username
browser['password'] = self.password
browser.submit()
return browser
keep_only_tags = [
classes('lp_is_start custom-1qvpywd')
]
remove_tags = [
dict(name=['button', 'aside', 'nav']),
classes('teaserable-layout teaserable-layout--teaser')
]
feeds = [
(u'SZ', u'https://www.sueddeutsche.de/news/rss'),
]
def preprocess_html(self, soup):
for pic in soup.findAll('picture'):
if nos := pic.find('noscript'):
nos.name = 'div'
for img in soup.findAll('img', attrs={'src':lambda n: n and n.startswith('data:')}):
img.extract()
return soup
def print_version(self, url):
return url.split('?')[0]