03-25-2013, 07:17 PM | #1 |
Connoisseur
Posts: 55
Karma: 13316
Join Date: Jul 2012
Device: iPad
|
Psychology Today Recipe
I wrote this recipe a long while ago, but somehow I never posted here. Here it is:
Code:
import re from calibre.web.feeds.recipes import BasicNewsRecipe class PsychologyToday(BasicNewsRecipe): title = 'Psychology Today' __author__ = 'Rick Shang' description = 'This magazine takes information from the latest research in the field of psychology and makes it useful to people in their everyday lives. Its coverage encompasses self-improvement, relationships, the mind-body connection, health, family, the workplace and culture.' language = 'en' category = 'news' encoding = 'UTF-8' keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})] no_javascript = True no_stylesheets = True def parse_index(self): articles = [] soup = self.index_to_soup('http://www.psychologytoday.com/magazine') #Go to the main body div = soup.find('div',attrs={'id':'content-content'}) #Find cover & date cover_item = div.find('div', attrs={'class':'collections-header-image'}) cover = cover_item.find('img',src=True) self.cover_url = cover['src'] date = self.tag_to_string(cover['title']) self.timefmt = u' [%s]'%date articles = [] for post in div.findAll('div', attrs={'class':'collections-node-feature-info'}): title = self.tag_to_string(post.find('h2')) author_item=post.find('div', attrs={'class':'collection-node-byline'}) author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip()) title = title + u' (%s)'%author article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href']) print_page=article_page.find('li', attrs={'class':'print_html first'}) url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href'] desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip() self.log('Found article:', title) self.log('\t', url) self.log('\t', desc) articles.append({'title':title, 'url':url, 'date':'','description':desc}) for post in div.findAll('div', attrs={'class':'collections-node-thumbnail-info'}): title = self.tag_to_string(post.find('h2')) author_item=post.find('div', attrs={'class':'collection-node-byline'}) article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href']) print_page=article_page.find('li', attrs={'class':'print_html first'}) description = post.find('div', attrs={'class':'collection-node-description'}) author = re.sub(r'.*by\s',"",self.tag_to_string(description.nextSibling).strip()) desc = self.tag_to_string(description).strip() url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href'] title = title + u' (%s)'%author self.log('Found article:', title) self.log('\t', url) self.log('\t', desc) articles.append({'title':title, 'url':url, 'date':'','description':desc}) for post in div.findAll('li', attrs={'class':['collection-item-list-odd','collection-item-list-even']}): title = self.tag_to_string(post.find('h2')) author_item=post.find('div', attrs={'class':'collection-node-byline'}) author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip()) title = title + u' (%s)'%author article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href']) print_page=article_page.find('li', attrs={'class':'print_html first'}) if print_page is not None: url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href'] desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip() self.log('Found article:', title) self.log('\t', url) self.log('\t', desc) articles.append({'title':title, 'url':url, 'date':'','description':desc}) return [('Current Issue', articles)] |
|
Similar Threads | ||||
Thread | Thread Starter | Forum | Replies | Last Post |
Request: Please update Psychology Today recipe | underwarez | Recipes | 0 | 07-04-2012 01:50 PM |
Fix for Psychology Today | circa68 | Recipes | 4 | 10-31-2011 05:25 AM |
Psychology Today website changed | Shuichiro | Recipes | 9 | 08-31-2011 02:06 PM |
Psychology Today recipe is recently failing to pull articles. | Shuichiro | Recipes | 1 | 08-06-2011 05:23 PM |
Psychology today news feed failing to download | Shuichiro | Recipes | 1 | 05-14-2011 05:11 AM |