Here is my personal profile for the Globe and Mail I use for my PRS-505. I'm not a coder so there is probably plenty of room for improvement. The only problem I have is that I cannot change the text size while viewing it on the Reader. When opening the e-book file, the Reader defaults to S sized text. Attempting to change the size to M or L causes my Reader to crash and restart. My firmware is ver. 1.0.00.08130.
Code:
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GlobeMail(BasicNewsRecipe):
title = 'The Globe and Mail'
html_description = False
use_pubdate = True
oldest_article = 7
use_embedded_content = False
max_articles_per_feed = 10
simultaneous_downloads = 1
no_stylesheets = True
summary_length = 300
html2lrf_options = ['--base-font-size', '9']
preprocess_regexps = [
(re.compile(r'<script.*?</script>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<style.*?</style>', re.IGNORECASE | re.DOTALL), lambda match : '<style> </style>'),
(re.compile(r'<body class="subscribe.*?<div id="articleAbstract">', re.IGNORECASE | re.DOTALL), lambda match : '<body><div>'),
(re.compile(r'<ul class="columnistInfo">.*?</ul>', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'<p class="note".*?</body>', re.IGNORECASE | re.DOTALL), lambda match : '<br><br>Subscription required to read full story</body>'),
(re.compile(r'<p class="deck"></p>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<p class="byline"></p>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<p class="date"></p>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<p><a href="http://www.globeinvestor.com/">.*?<h2', re.IGNORECASE | re.DOTALL), lambda match : '<h2'),
(re.compile(r'<h1 class="keyline">.*?</h1>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<p class="date">.*?<(\S+)>', re.IGNORECASE | re.DOTALL), lambda match : match.group().replace(match.group(1), '/p><br') ),
(re.compile(r'<a href.*? target="offsite">', re.IGNORECASE | re.DOTALL), lambda match : '<a name="#">'),
(re.compile(r'<tr>', re.IGNORECASE | re.DOTALL), lambda match : '<br>'),
(re.compile(r'<td>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'</tr>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'</td>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<hr>', re.IGNORECASE | re.DOTALL), lambda match : ' '),
(re.compile(r'<!-- /frag.../copyright begins -->', re.IGNORECASE | re.DOTALL), lambda match : '<br><!-- /frag.../copyright begins --><br>'),
]
def get_article_url(self, article):
return article.get('feedburner_origlink', article.link)
def print_version(self, url):
return 'http://www.globeinvestor.com/servlet/ArticleNews/print/' + (url.split('/story/',1)[1]).split('.',1)[0] + '/' + url.rsplit('.',3)[2] + '/' + url.rsplit('.',3)[3]
def get_feeds(self):
return [
(' A. Front Page', 'http://www.theglobeandmail.com/generated/rss/BN/Front.xml'),
(' B. British Columbia', 'http://www.theglobeandmail.com/generated/rss/BN/HYBritishColumbia.xml'),
(' C. National', 'http://www.theglobeandmail.com/generated/rss/BN/National.xml'),
(' D. World', 'http://www.theglobeandmail.com/generated/rss/BN/International.xml'),
(' E. Americas', 'http://www.theglobeandmail.com/generated/rss/BN/HYAmerica.xml'),
(' F. Report on Business', 'http://www.theglobeandmail.com/generated/rss/BN/Business.xml'),
(' G. Energy News', 'http://www.theglobeandmail.com/generated/rss/BN/energy.xml'),
(' H. Your Money', 'http://www.theglobeandmail.com/generated/rss/BN/SpecialEvents2.xml'),
(' I. Sports', 'http://www.theglobeandmail.com/generated/rss/BN/Sports.xml'),
(' J. The Arts', 'http://www.theglobeandmail.com/generated/rss/BN/Entertainment.xml'),
(' K. Movies', 'http://www.theglobeandmail.com/generated/rss/BN/HYMovies.xml'),
(' L. Music', 'http://www.theglobeandmail.com/generated/rss/BN/HYMusic.xml'),
(' M. Technology', 'http://www.theglobeandmail.com/generated/rss/BN/Technology.xml'),
(' N. Science', 'http://www.theglobeandmail.com/generated/rss/BN/Science.xml'),
(' O. Life', 'http://www.theglobeandmail.com/generated/rss/BN/lifeMain.xml'),
(' P. Food & Wine', 'http://www.theglobeandmail.com/generated/rss/BN/lifeFoodWine.xml'),
(' Q. Travel', 'http://www.theglobeandmail.com/generated/rss/BN/specialTravel.xml'),
(' R. Health', 'http://www.theglobeandmail.com/generated/rss/BN/specialScienceandHealth.xml'),
]