MobileRead Forums - View Single Post - Custom recipes (archive, read-only)

exdream · 02-05-2010, 06:41 PM

Please help!
I'm trying to figure out a recipe for http://szmobil.sueddeutsche.de/. I'm working on it pretty long now and after a short success with parsing one section I can't get the login with calibres browser-instance going

Every downloaded article page is the login form. Has anybody an idea? Thanks for your help!

from calibre.web.feeds.recipes import BasicNewsRecipe

class SzMobilRecipe(BasicNewsRecipe):
title = u'S\xfcddeutsche Zeitung'
oldest_article = 7
max_articles_per_feed = 100
description = 'Sueddeutsche Zeitung Mobile Ausgabe'
language = 'de'

needs_subscription = True

def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://szmobil.sueddeutsche.de/login.php')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br

# feeds = [(u'Streiflicht', u'http://szmobil.sueddeutsche.de/show.php?id=streif')]

def parse_index(self):
feeds = []
for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik')
# ('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'),
# ('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'),
# ('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'),
# ('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'),
# ('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'),
# ('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'),
# ('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'),
# ('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'),
# ('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern')
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds

def nz_parse_section(self, url):
soup = self.index_to_soup(url)
current_articles = []
for li in soup.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
return current_articles

02-05-2010, 06:41 PM	#1373
exdream Junior Member Posts: 9 Karma: 10 Join Date: Jan 2010 Device: Sony PRS-505	Please help! I'm trying to figure out a recipe for http://szmobil.sueddeutsche.de/. I'm working on it pretty long now and after a short success with parsing one section I can't get the login with calibres browser-instance going Every downloaded article page is the login form. Has anybody an idea? Thanks for your help! from calibre.web.feeds.recipes import BasicNewsRecipe class SzMobilRecipe(BasicNewsRecipe): title = u'S\xfcddeutsche Zeitung' oldest_article = 7 max_articles_per_feed = 100 description = 'Sueddeutsche Zeitung Mobile Ausgabe' language = 'de' needs_subscription = True def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open('http://szmobil.sueddeutsche.de/login.php') br.select_form(nr=0) br['username'] = self.username br['password'] = self.password br.submit() return br # feeds = [(u'Streiflicht', u'http://szmobil.sueddeutsche.de/show.php?id=streif')] def parse_index(self): feeds = [] for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik') # ('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'), # ('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'), # ('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'), # ('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'), # ('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'), # ('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'), # ('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'), # ('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'), # ('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern') ]: articles = self.nz_parse_section(url) if articles: feeds.append((title, articles)) return feeds def nz_parse_section(self, url): soup = self.index_to_soup(url) current_articles = [] for li in soup.findAll('li'): a = li.find('a', href = True) if a is None: continue title = self.tag_to_string(a) url = a.get('href', False) if not url or not title: continue current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) return current_articles