View Single Post
Old 04-06-2011, 03:46 PM   #2
Selcal
Member
Selcal began at the beginning.
 
Posts: 16
Karma: 10
Join Date: Jul 2010
Device: PRS600 / Cybook Opus
Ok, I understand I can use a try syntax or similar, but my knowledge of Python is too poor and I can't find out how to do it. Here's the code that produces the error:
Code:
def parse_index(self):
        krant = []
        def strip_title(_title):
            i = 0 
            while ((_title[i] <> ":") and (i <= len(_title))): 
               i = i + 1
            return(_title[0:i])		     
        soup = self.index_to_soup(self.INDEX_MAIN)
        mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
        for option in mainsoup.findAll('option'):
           articles = []
           _INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
           _INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
           print ''
           print '<-------    Processing section: ' + _INDEX + ' ------------------------->'
           soup = self.index_to_soup(_INDEX)
           for item in soup.findAll('area'):
              art_nr = item['class']
              attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
              print '==> Found: ' + attrname;
              index_title = soup.find('div', attrs={'class': attrname})
              get_title = index_title['title'];
              _ARTICLE   = _INDEX_ARTICLE + attrname + '.html#text'
              title = get_title;
              print '--> Title: ' + title;
              print '--> URL: ' + _ARTICLE;
              souparticle =  self.index_to_soup(_ARTICLE);
              headerurl = souparticle.findAll('frame')[0]['src'];
              print '--> Read frame name for header: ' + headerurl;
              url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
              print '--> Corrected URL: ' + url;
              if (get_title <> ''):
                 title = strip_title(get_title)
                 date  = strftime(' %B %Y')
              if (title <> ''):
                 articles.append({
                                         'title'      :title
                                        ,'date'       :date
                                        ,'url'        :url
                                        ,'description':''
                                        })
           krant.append( (option.string, articles))
        return krant
Selcal is offline   Reply With Quote