#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>' ''' impreso.milenio.com ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup import datetime import string class Milenio(BasicNewsRecipe): title = u'Milenio-diario' __author__ = 'Bmsleight' description = 'Milenio-diario' oldest_article = 10 max_articles_per_feed = 100 no_stylesheets = False index = 'http://impreso.milenio.com' keep_only_tags = [ dict(name='div', attrs={'class':'content'}) ] def parse_index(self): # "%m/%d/%Y" # http://impreso.milenio.com/Nacional/2010/09/01/ totalfeeds = [] soup = self.index_to_soup(self.index + "/Nacional/" + datetime.date.today().strftime("%Y/%m/%d")) maincontent = soup.find('div',attrs={'class':'content'}) mfeed = [] if maincontent: for itt in maincontent.findAll('a',href=True): if "/node/" in str(itt['href']): url = self.index + itt['href'] title = self.tag_to_string(itt) description = '' date = strftime(self.timefmt) mfeed.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) totalfeeds.append(('Articles', mfeed)) return totalfeeds