View Single Post
Old 02-03-2010, 04:50 PM   #1345
exdream
Junior Member
exdream began at the beginning.
 
Posts: 9
Karma: 10
Join Date: Jan 2010
Device: Sony PRS-505
Hi

I try to make a recipe for http://szmobil.sueddeutsche.de/ This ist the code up to now (with which I get - IndexError: list index out of range -Error Code: 1). Am I on the right way with that? Can somebody please tell me what is wrong.
...

def parse_index(self):
feeds = []
for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik'),
('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'),
('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'),
('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'),
('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'),
('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'),
('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'),
('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'),
('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'),
('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern'),
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds

def nz_parse_section(self, url):
soup = self.index_to_soup(url)
# div = soup.find(attrs={'class': 'col-300 categoryList'})
# date = div.find(attrs={'class': 'link-list-heading'})

current_articles = []
# for tag in date.findAllNext(attrs = {'class': ['linkList', 'link-list-heading']}):
# if tag.get('class') == 'link-list-heading':
# break
for li in soup.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
# if url.startswith('/'):
# url = 'http://www.nzherald.co.nz'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})

return current_articles
exdream is offline