I'm trying to write recipe for one weekly magazine on-line. The frontpage is the one with the links embedded into span tags with specific class. The code works - sort of.
Even though page has 10-13 links the loop I created retrieves 2 and than stops. Can anybody help me with this please?
Code:
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
vreme.com
'''
import string
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
class Vreme(BasicNewsRecipe):
title = 'Vreme'
__author__ = 'Darko Miletic'
description = 'Politicki Nedeljnik Srbije'
timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True
simultaneous_downloads = 1
delay = 1
INDEX = 'http://www.vreme.com'
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
for item in soup.findAll('span', attrs={'class':'toc2'}):
#print item
feed_link = item.find('a')
if feed_link and feed_link.has_key('href'):
url = self.INDEX+feed_link['href']+'&print=yes'
title = self.tag_to_string(feed_link)
date = strftime('%a, %d %b')
description = ''
articles.append({
'title':title,
'date':date,
'url':url,
'description':description
})
return [('Latest edition', articles)]