MobileRead Forums - View Single Post - Custom recipes (archive, read-only)

exdream · 02-03-2010, 04:50 PM

Hi

I try to make a recipe for http://szmobil.sueddeutsche.de/ This ist the code up to now (with which I get - IndexError: list index out of range -Error Code: 1). Am I on the right way with that? Can somebody please tell me what is wrong.

...

def parse_index(self):
feeds = []
for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik'),
('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'),
('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'),
('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'),
('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'),
('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'),
('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'),
('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'),
('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'),
('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern'),
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds

def nz_parse_section(self, url):
soup = self.index_to_soup(url)
# div = soup.find(attrs={'class': 'col-300 categoryList'})
# date = div.find(attrs={'class': 'link-list-heading'})

current_articles = []
# for tag in date.findAllNext(attrs = {'class': ['linkList', 'link-list-heading']}):
# if tag.get('class') == 'link-list-heading':
# break
for li in soup.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
# if url.startswith('/'):
# url = 'http://www.nzherald.co.nz'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})

return current_articles

02-03-2010, 04:50 PM	#1345
exdream Junior Member Posts: 9 Karma: 10 Join Date: Jan 2010 Device: Sony PRS-505	Hi I try to make a recipe for http://szmobil.sueddeutsche.de/ This ist the code up to now (with which I get - IndexError: list index out of range -Error Code: 1). Am I on the right way with that? Can somebody please tell me what is wrong. ... def parse_index(self): feeds = [] for title, url in [('Politik', 'http://szmobil.sueddeutsche.de/show.php?section=Politik'), ('Seite Drei', 'http://szmobil.sueddeutsche.de/show.php?section=Seite+drei'), ('Meinungsseite', 'http://szmobil.sueddeutsche.de/show.php?section=Meinungsseite'), ('Panorama', 'http://szmobil.sueddeutsche.de/show.php?section=Panorama'), ('Feuilleton', 'http://szmobil.sueddeutsche.de/show.php?section=Feuilleton'), ('Medien', 'http://szmobil.sueddeutsche.de/show.php?section=Medien'), ('Wissen', 'http://szmobil.sueddeutsche.de/show.php?section=Wissen'), ('Wirtschaft', u'http://szmobil.sueddeutsche.de/show.php?section=Wirtschaft'), ('Sport', u'http://szmobil.sueddeutsche.de/show.php?section=Sport'), ('Muenchen-Bayern', u'http://szmobil.sueddeutsche.de/show.php?section=M%FCnchen%2FBayern'), ]: articles = self.nz_parse_section(url) if articles: feeds.append((title, articles)) return feeds def nz_parse_section(self, url): soup = self.index_to_soup(url) # div = soup.find(attrs={'class': 'col-300 categoryList'}) # date = div.find(attrs={'class': 'link-list-heading'}) current_articles = [] # for tag in date.findAllNext(attrs = {'class': ['linkList', 'link-list-heading']}): # if tag.get('class') == 'link-list-heading': # break for li in soup.findAll('li'): a = li.find('a', href = True) if a is None: continue title = self.tag_to_string(a) url = a.get('href', False) if not url or not title: continue # if url.startswith('/'): # url = 'http://www.nzherald.co.nz'+url self.log('\t\tFound article:', title) self.log('\t\t\t', url) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) return current_articles