Psychology Today Recipe

rainrdx · 03-25-2013, 07:17 PM

I wrote this recipe a long while ago, but somehow I never posted here. Here it is:

Code:

import re
from calibre.web.feeds.recipes import BasicNewsRecipe


class PsychologyToday(BasicNewsRecipe):

    title       = 'Psychology Today'
    __author__  = 'Rick Shang'

    description = 'This magazine takes information from the latest research in the field of psychology and makes it useful to people in their everyday lives. Its coverage encompasses self-improvement, relationships, the mind-body connection, health, family, the workplace and culture.'
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'
    keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})]
    no_javascript = True
    no_stylesheets = True


    def parse_index(self):
        articles = []
        soup = self.index_to_soup('http://www.psychologytoday.com/magazine')

	
	#Go to the main body
	div = soup.find('div',attrs={'id':'content-content'})
	#Find cover & date
	cover_item = div.find('div', attrs={'class':'collections-header-image'})
	cover = cover_item.find('img',src=True)
	self.cover_url = cover['src']
	date = self.tag_to_string(cover['title'])
	self.timefmt = u' [%s]'%date

        articles = []
        for post in div.findAll('div', attrs={'class':'collections-node-feature-info'}):
		title = self.tag_to_string(post.find('h2'))
		author_item=post.find('div', attrs={'class':'collection-node-byline'})
		author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
		title = title + u' (%s)'%author
		article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
		print_page=article_page.find('li', attrs={'class':'print_html first'})
		url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
		desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
		self.log('Found article:', title)
		self.log('\t', url)
		self.log('\t', desc)
		articles.append({'title':title, 'url':url, 'date':'','description':desc})

        for post in div.findAll('div', attrs={'class':'collections-node-thumbnail-info'}):
		title = self.tag_to_string(post.find('h2'))
		author_item=post.find('div', attrs={'class':'collection-node-byline'})		
		article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
		print_page=article_page.find('li', attrs={'class':'print_html first'})
		description = post.find('div', attrs={'class':'collection-node-description'})
		author = re.sub(r'.*by\s',"",self.tag_to_string(description.nextSibling).strip())
		desc = self.tag_to_string(description).strip()
		url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
		title = title + u' (%s)'%author
		self.log('Found article:', title)
		self.log('\t', url)
		self.log('\t', desc)
		articles.append({'title':title, 'url':url, 'date':'','description':desc})

        for post in div.findAll('li', attrs={'class':['collection-item-list-odd','collection-item-list-even']}):
		title = self.tag_to_string(post.find('h2'))
		author_item=post.find('div', attrs={'class':'collection-node-byline'})
		author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
		title = title + u' (%s)'%author
		article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
		print_page=article_page.find('li', attrs={'class':'print_html first'})
		if print_page is not None:
			url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
			desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
			self.log('Found article:', title)
			self.log('\t', url)
			self.log('\t', desc)
			articles.append({'title':title, 'url':url, 'date':'','description':desc})

        return [('Current Issue', articles)]

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
Request: Please update Psychology Today recipe	underwarez	Recipes	0	07-04-2012 01:50 PM
Fix for Psychology Today	circa68	Recipes	4	10-31-2011 05:25 AM
Psychology Today website changed	Shuichiro	Recipes	9	08-31-2011 02:06 PM
Psychology Today recipe is recently failing to pull articles.	Shuichiro	Recipes	1	08-06-2011 05:23 PM
Psychology today news feed failing to download	Shuichiro	Recipes	1	05-14-2011 05:11 AM