View Single Post
Old 02-05-2010, 05:41 PM   #1373
exdream
Junior Member
exdream began at the beginning.
 
Posts: 9
Karma: 10
Join Date: Jan 2010
Device: Sony PRS-505
Please help!
I'm trying to figure out a recipe for http://szmobil.sueddeutsche.de/. I'm working on it pretty long now and after a short success with parsing one section I can't get the login with calibres browser-instance going Every downloaded article page is the login form. Has anybody an idea? Thanks for your help!

from calibre.web.feeds.recipes import BasicNewsRecipe

class SzMobilRecipe(BasicNewsRecipe):
title = u'S\xfcddeutsche Zeitung'
oldest_article = 7
max_articles_per_feed = 100
description = 'Sueddeutsche Zeitung Mobile Ausgabe'
language = 'de'

needs_subscription = True

def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://szmobil.sueddeutsche.de/login.php')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br

# feeds = [(u'Streiflicht', u'http://szmobil.sueddeutsche.de/show.php?id=streif')]


def parse_index(self):
feeds = []
for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik')
# ('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'),
# ('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'),
# ('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'),
# ('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'),
# ('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'),
# ('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'),
# ('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'),
# ('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'),
# ('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern')
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds

def nz_parse_section(self, url):
soup = self.index_to_soup(url)
current_articles = []
for li in soup.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
return current_articles
exdream is offline