Ok, I understand I can use a try syntax or similar, but my knowledge of Python is too poor and I can't find out how to do it. Here's the code that produces the error:
Code:
def parse_index(self):
krant = []
def strip_title(_title):
i = 0
while ((_title[i] <> ":") and (i <= len(_title))):
i = i + 1
return(_title[0:i])
soup = self.index_to_soup(self.INDEX_MAIN)
mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
for option in mainsoup.findAll('option'):
articles = []
_INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
_INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
print ''
print '<------- Processing section: ' + _INDEX + ' ------------------------->'
soup = self.index_to_soup(_INDEX)
for item in soup.findAll('area'):
art_nr = item['class']
attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
print '==> Found: ' + attrname;
index_title = soup.find('div', attrs={'class': attrname})
get_title = index_title['title'];
_ARTICLE = _INDEX_ARTICLE + attrname + '.html#text'
title = get_title;
print '--> Title: ' + title;
print '--> URL: ' + _ARTICLE;
souparticle = self.index_to_soup(_ARTICLE);
headerurl = souparticle.findAll('frame')[0]['src'];
print '--> Read frame name for header: ' + headerurl;
url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
print '--> Corrected URL: ' + url;
if (get_title <> ''):
title = strip_title(get_title)
date = strftime(' %B %Y')
if (title <> ''):
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
krant.append( (option.string, articles))
return krant