Figured out the issue. It was reading the title attribute of the link instead of the text within it. This caused the issue of processing things like registered trademark symbols as control codes instead of the symbols they were in the feed names.
I split out those parts and ended up with the following code:
Code:
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
import re
class PaizoWebFictionRecipe(BasicNewsRecipe):
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://paizo.com/pathfinder/tales/serial')
for alink in soup.findAll("span", {"class" : "productCategory"}):
url = alink.a
name = '{}'.format(url.string)
name = name.split('>').pop()
print 'Story name is : ', name
if url.string != None:
feeds.append((name, 'http://paizo.com' + url['href'] + '&xml=atom'))
if not feeds:
raise NotImplementedError
print 'Feeds are: ', feeds
return feeds
title = 'Pathfinder Web Fiction v2.0.1'
oldest_article = 1000000
max_articles_per_feed = 10
reverse_article_order = True
cover_url = 'http://paizo.com/image/content/Logos/PathfinderTales_500.jpeg'
remove_tags_after = [dict(name='a', text='Tags')]
preprocess_regexps = [ (re.compile(r'src="', re.DOTALL|re.IGNORECASE), lambda match: 'src="http://www.paizo.com'), ]