View Single Post
Old 04-11-2023, 01:05 PM   #8
Sushi5675
Junior Member
Sushi5675 began at the beginning.
 
Posts: 8
Karma: 10
Join Date: Mar 2023
Device: kindle paperwhite
Quote:
Originally Posted by unkn0wn View Post
try
Code:
    def get_browser(self):
        
        def is_form_login(form):
            return "id" in form.attrs and form.attrs['id'] == "login-form"

        browser = BasicNewsRecipe.get_browser(self)
        # Login
        url = 'https://id.sueddeutsche.de/login'
        browser.open(url)
        browser.select_form(predicate=is_form_login)
        browser['login'] = self.username
        browser['password'] = self.password
        browser.submit()
        return browser
Thanks unkn0wn, notification of your post didnt work so please excuse my late reply.

I debugged the the output and the login works.
In the console output I can read my profile ID, which is only visible after successful login.

But unfortunately only two or three articles are readable.

The strange thing is, that some articles behind the paywall are readable and others are not. The rest of the articles are reduced.

Any ideas?


Code:
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'

#import
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import strftime
import time


##SZ
class Sueddeutsche(BasicNewsRecipe):
    title = u'SZ8'
    description = 'News from Germany'
    publisher = u'Süddeutsche Zeitung'
    category = 'news, politics'
    timefmt = ' [%a, %d %b %Y]'
    oldest_article = 1
    max_articles_per_feed = 10
    language = 'de'
    encoding = 'utf-8'
    publication_type = 'newspaper'
    remove_empty_feeds = True
    needs_subscription = True

    
    simultaneous_downloads = 1
    recursions = 0

    feeds = [  
        #(u'Politik', u'http://rss.sueddeutsche.de/rss/Politik'),
        
        (u'SZ', u'https://www.sueddeutsche.de/news/rss?		search=&sort=date&dep%5B%5D=politik&typ%5B%5D=article&all%5B%5D=sys&all%5B%5D=time&sys%5B%5D=sz&catsz%5B%5D=szTopThemes'), 
    ]
    
    def get_browser(self):
            def is_form_login(form):
                return "id" in form.attrs and form.attrs['id'] == "login-form"
            browser = BasicNewsRecipe.get_browser(self)
            # Login
            url = 'https://id.sueddeutsche.de/login'
            browser.open(url)
            browser.select_form(predicate=is_form_login)
            #browser.select_form(nr=0)  # first form
            browser['login'] = self.username
            browser['password'] = self.password
            browser.submit()
            return browser
    
    def print_version(self, url):
            if '?' in url:
                new_url = self.browser.open(url + '&print=true').geturl()
            else: 
                new_url = self.browser.open(url + '?print=true').geturl()
            return new_url
Sushi5675 is offline   Reply With Quote