Can anybody help why the Multipage part doesn't work on following recipe:
Spoiler:
Code:
class AdvancedUserRecipe1275708473(BasicNewsRecipe):
title = u'My Psychology Today'
# oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
keep_only_tags = [dict(name='div', attrs={'id':['contentColumn','content-content']})]
remove_tags = [
dict(name='div', attrs={'id':'advertisement advertisement-zone-51'}),
dict(name='div', attrs={'id':'block-td_search_160'}),
dict(name='div', attrs={'id':'block-cam_search_160'}),
dict(name='div', attrs={'class':'article-sub-meta'}),
dict(name='div', attrs={'class':'article-terms meta'}),
]
# remove_tags_after = dict(id=['rightColumn'])
feeds = [(u'Contents', u'http://www.psychologytoday.com/articles/index.rss')]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'pager-next'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'id':'contentColumn'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['ul', 'li']):
tag.name = 'div'
return soup
Thank you in advance.