Hi Kovid,
The rss feed addresses are out of date in Il Sole 24 Ore. I have suggested the updated feeds on your Github, but if you prefer also copied them below. In addition, the recipe still fails
Thanks
Jamie
Code:
__author__ = 'Marco Saraceno'
__copyright__ = '2010, Marco Saraceno <marcosaraceno at gmail.com>'
description = 'Italian daily newspaper - v 1.1 (Mar14,2011)'
'''
http://www.ilsole24ore.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class IlSole24Ore(BasicNewsRecipe):
__author__ = 'Marco Saraceno'
description = 'Italian financial daily newspaper'
cover_url = 'http://www.shopping24.ilsole24ore.com/ProductRelated/rds/img/logo_sole.gif'
title = u'Il Sole 24 Ore'
publisher = 'Gruppo editoriale GRUPPO 24ORE'
category = 'News, politics, culture, economy, financial, Italian'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
remove_tags = [
dict(name='div', attrs={'class': ['header', 'titolo']}),
dict(name='table', attrs={'class': ['footer1024', 'footerdown']}),
]
def get_article_url(self, article):
link = article.get('link', None)
if link is None:
return article
if link.split('/')[-1] == "story01.htm":
link = link.split('/')[-2]
a = ['0B', '0C', '0D', '0E', '0F', '0G', '0N', '0L0S', '0A']
b = ['.', '/', '?', '-', '=', '&', '.com', 'www.', '0']
for i in range(0, len(a)):
link = link.replace(a[i], b[i])
link = "http://" + link
return link
feeds = [
('Italia', 'https://www.ilsole24ore.com/rss/italia.xml'),
('Mondo', 'https://www.ilsole24ore.com/rss/mondo.xml'),
('Economia', 'https://www.ilsole24ore.com/rss/economia.xml'),
('Finanzia', 'https://www.ilsole24ore.com/rss/finanza.xml'),
('Commenti', 'https://www.ilsole24ore.com/rss/commenti.xml'),
('Risparmio', 'https://www.ilsole24ore.com/rss/risparmio.xml'),
('Norme e Tributi', 'https://www.ilsole24ore.com/rss/norme-e-tributi.xml'),
('Management', 'https://www.ilsole24ore.com/rss/management.xml'),
('Cultura', 'https://www.ilsole24ore.com/rss/cultura.xml'),
('Tecnologia', 'https://www.ilsole24ore.com/rss/tecnologia.xml'),
('Food', 'https://www.ilsole24ore.com/rss/food.xml'),
('Moda', 'https://www.ilsole24ore.com/rss/moda.xml'),
('Motori', 'https://www.ilsole24ore.com/rss/motori.xml'),
('Casa', 'https://www.ilsole24ore.com/rss/casa.xml'),
('Viaggi', 'https://www.ilsole24ore.com/rss/viaggi.xml'),
('Salute', 'https://www.ilsole24ore.com/rss/salute.xml'),
('Arteconomy', 'https://www.ilsole24ore.com/rss/arteconomy.xml'),
('Sport', 'https://www.ilsole24ore.com/rss/sport24.xml'),
]
def print_version(self, url):
return url.replace('.shtml', '_PRN.shtml')