|
|
#1 |
|
Connoisseur
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 55
Karma: 13316
Join Date: Jul 2012
Device: iPad
|
Psychology Today Recipe
I wrote this recipe a long while ago, but somehow I never posted here. Here it is:
Code:
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class PsychologyToday(BasicNewsRecipe):
title = 'Psychology Today'
__author__ = 'Rick Shang'
description = 'This magazine takes information from the latest research in the field of psychology and makes it useful to people in their everyday lives. Its coverage encompasses self-improvement, relationships, the mind-body connection, health, family, the workplace and culture.'
language = 'en'
category = 'news'
encoding = 'UTF-8'
keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})]
no_javascript = True
no_stylesheets = True
def parse_index(self):
articles = []
soup = self.index_to_soup('http://www.psychologytoday.com/magazine')
#Go to the main body
div = soup.find('div',attrs={'id':'content-content'})
#Find cover & date
cover_item = div.find('div', attrs={'class':'collections-header-image'})
cover = cover_item.find('img',src=True)
self.cover_url = cover['src']
date = self.tag_to_string(cover['title'])
self.timefmt = u' [%s]'%date
articles = []
for post in div.findAll('div', attrs={'class':'collections-node-feature-info'}):
title = self.tag_to_string(post.find('h2'))
author_item=post.find('div', attrs={'class':'collection-node-byline'})
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
title = title + u' (%s)'%author
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
print_page=article_page.find('li', attrs={'class':'print_html first'})
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
self.log('Found article:', title)
self.log('\t', url)
self.log('\t', desc)
articles.append({'title':title, 'url':url, 'date':'','description':desc})
for post in div.findAll('div', attrs={'class':'collections-node-thumbnail-info'}):
title = self.tag_to_string(post.find('h2'))
author_item=post.find('div', attrs={'class':'collection-node-byline'})
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
print_page=article_page.find('li', attrs={'class':'print_html first'})
description = post.find('div', attrs={'class':'collection-node-description'})
author = re.sub(r'.*by\s',"",self.tag_to_string(description.nextSibling).strip())
desc = self.tag_to_string(description).strip()
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
title = title + u' (%s)'%author
self.log('Found article:', title)
self.log('\t', url)
self.log('\t', desc)
articles.append({'title':title, 'url':url, 'date':'','description':desc})
for post in div.findAll('li', attrs={'class':['collection-item-list-odd','collection-item-list-even']}):
title = self.tag_to_string(post.find('h2'))
author_item=post.find('div', attrs={'class':'collection-node-byline'})
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
title = title + u' (%s)'%author
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
print_page=article_page.find('li', attrs={'class':'print_html first'})
if print_page is not None:
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
self.log('Found article:', title)
self.log('\t', url)
self.log('\t', desc)
articles.append({'title':title, 'url':url, 'date':'','description':desc})
return [('Current Issue', articles)]
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Request: Please update Psychology Today recipe | underwarez | Recipes | 0 | 07-04-2012 01:50 PM |
| Fix for Psychology Today | circa68 | Recipes | 4 | 10-31-2011 05:25 AM |
| Psychology Today website changed | Shuichiro | Recipes | 9 | 08-31-2011 02:06 PM |
| Psychology Today recipe is recently failing to pull articles. | Shuichiro | Recipes | 1 | 08-06-2011 05:23 PM |
| Psychology today news feed failing to download | Shuichiro | Recipes | 1 | 05-14-2011 05:11 AM |