View Single Post
Old 05-28-2011, 03:19 PM   #1
Shuichiro
Junior Member
Shuichiro began at the beginning.
 
Posts: 4
Karma: 10
Join Date: May 2011
Device: Kindle
Psychology Today website changed

Can someone please help with altering the default installed "Psychology Today" recipe? I miss reading my articles and whenever I download the feed the articles fail to download.


from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1275708473(BasicNewsRecipe):
title = u'Psychology Today'
_author__ = 'rty'
publisher = u'www.psychologytoday.com'
category = u'Psychology'
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
temp_files = []
articles_are_obfuscated = True
remove_tags = [
dict(name='div', attrs={'class':['print-source_url','field-items','print-footer']}),
dict(name='span', attrs={'class':'print-footnote'}),
]
remove_tags_before = dict(name='h1', attrs={'class':'print-title'})
remove_tags_after = dict(name='div', attrs={'class':['field-items','print-footer']})

feeds = [(u'Contents', u'http://www.psychologytoday.com/articles/index.rss')]

def get_article_url(self, article):
return article.get('link', None)

def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_f a.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name

def get_cover_url(self):
index = 'http://www.psychologytoday.com/magazine/'
soup = self.index_to_soup(index)
for image in soup.findAll('img',{ "class" : "imagefield imagefield-field_magazine_cover" }):
return image['src'] + '.jpg'
return None
Shuichiro is offline   Reply With Quote