Hello,
I stand to be corrected but I think it's something to do with the encoding:
Code:
encoding = 'ISO-8859-15'
You might try instead:
As per the 2nd post. Hopefully it solves the problem. The recipe following this change would therefore be:
Code:
__license__ = 'GPL v3'
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns, 2013 Modified by O. O'H"
'''
irishtimes.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times'
encoding = 'UTF-8'
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan, Phil Burns & O. O'H"
language = 'en_IE'
timefmt = ' (%A, %B %d, %Y)'
oldest_article = 1.0
max_articles_per_feed = 100
no_stylesheets = True
simultaneous_downloads= 5
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags = dict(name='article', attrs={'class':'article row'})
remove_tags = [dict(name='div', attrs={'class':'footer'})]
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }'
feeds = [
('News', 'http://www.irishtimes.com/cmlink/the-irish-times-news-1.1319192'),
('Business', 'http://www.irishtimes.com/cmlink/the-irish-times-business-1.1319195'),
('Debate', 'http://www.irishtimes.com/cmlink/the-irish-times-news-1.1319192'),
('Life Style', 'http://www.irishtimes.com/cmlink/the-irish-times-life-style-1.1319214'),
('Culture', 'http://www.irishtimes.com/cmlink/the-irish-times-culture-1.1319213'),
('Sport', 'http://www.irishtimes.com/cmlink/the-irish-times-sport-1.1319194'),
]
def print_version(self, url):
if url.count('rss.feedsportal.com'):
#u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
u = url.find('irishtimes')
u = 'http://www.irishtimes.com' + url[u + 12:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0Bhtml/story01.htm', '_pf.html')
else:
u = url.replace('.html','_pf.html')
return u
def get_article_url(self, article):
return article.link