Hy Starson17
this is the entire recipe.
my previous post is incorrect.
I obtain the entire page, like you, and it's equal than the page obtained with "VIEW SOURCE".
My problem is:
i am not able to find anything in the page.
i have tried various combinatio of attribute but with no results.
PHP Code:
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
'''
http://rassegnastampa.mef.gov.it/mefnazionale/Default.aspx
'''
from calibre.web.feeds.news import BasicNewsRecipe
class RassegnaMefParseIndex(BasicNewsRecipe):
author = 'Marini Gabriele'
description = 'Rassegna Stampa MEV'
cover_url = 'http://rassegnastampa.mef.gov.it/Mef/sorg_n/nazionale.jpg'
title = u'Rassegna MEF'
publisher = 'Ministero Economia e Finanze'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
def parse_index(self):
feeds = []
for title, url in [
("Rassegna Nazionale", "http://rassegnastampa.mef.gov.it/mefnazionale/Default.aspx"),
("Rassegna Nazionale 2", "http://rassegnastampa.mef.gov.it/mefnazionale/")
]:
soup = self.index_to_soup(url)
articles = []
#Main Aperture
soup = soup.find(name='div', attr={'id':'results'})
if soup:
article = soup.find('tbody')
for article in soup.findAllNext('tr'):
article_first = article
tupla = article.find(attrs={'class':'TopicCellShort'})
title_url = self.tag_to_string(tupla)
tupla = article.find(attrs={'class':'PublicationCellShort'})
title_url += self.tag_to_string(tupla)
tupla = article.find(attrs={'class':'TitleCellShort'})
title_url += self.tag_to_string(tupla)
tupla = article.find(attrs={'class':'OcrLinkCellShort'})
link = tupla.get('href', False)
date = ''
description = ''
if title_url:
articles.append({'title': title_url, 'url': link,'description':description, 'date':date})
if articles:
feeds.append((title, articles))
return feeds