| 
			
			 | 
		#1 | 
| 
			
			
			
			 Enthusiast 
			
			![]() Posts: 36 
				Karma: 10 
				Join Date: Dec 2017 
				Location: Los Angeles, CA 
				
				
				Device: Smart Phone 
				
				
				 | 
	
	
	
		
		
			
			 
				
				Nature Journal Recipe
			 
			
			
			Here is a recipe for the `Nature' journal. 
		
	
		
		
		
		
		
		
		
		
		
		
	
	Nature Recipe Code: 
	#!/usr/bin/env python2
from collections import defaultdict
from calibre.web.feeds.news import BasicNewsRecipe
BASE = 'https://www.nature.com'
def absurl(url):
  if url.startswith('/'):
      url = BASE + url
  elif url.startswith('http://'):
      url = 'https' + url[4:]
  return url
def check_words(words):
    return lambda x: x and frozenset(words.split()).intersection(x.split())
class Nature(BasicNewsRecipe):
    title = 'Nature'
    __author__ = 'Jose Ortiz'
    description = ('Nature is a weekly international multidisciplinary scientific journal'
                   ' publishing peer-reviewed research in all fields of science and'
                   ' technology on the basis of its originality, importance,'
                   ' interdisciplinary interest, timeliness, accessibility, elegance and'
                   ' surprising conclusions.  Nauture also provides rapid, authoritative,'
                   ' insightful and arresting news and interpretation of topical and coming'
                   ' trends affecting science, scientists and the wider public.')
    language = 'en'
    encoding = 'UTF-8'
    no_javascript = True
    no_stylesheets = True
    keep_only_tags = [
        dict(name='div',attrs={'data-component' : check_words('article-container')})
    ]
    remove_tags = [
        dict(attrs={'class' : check_words('hide-print')})
    ]
    def parse_index(self):
        soup = self.index_to_soup(BASE + '/nature/current-issue')
        self.cover_url = 'https:' + soup.find('img',attrs={'data-test' : 'issue-cover-image'})['src']
        section_tags = soup.find('div', {'data-container-type' : check_words('issue-section-list')})
        section_tags = section_tags.findAll('div', {'class' : check_words('article-section')})
        sections = defaultdict(list)
        ordered_sec_titles = []
        index = []
        for sec in section_tags:
            sec_title = self.tag_to_string(sec.find('h2'))
            ordered_sec_titles.append(sec_title)
            for article in sec.findAll('article'):
                title = self.tag_to_string(article.find('h3', {'itemprop' : check_words('name headline')}))
                date = ' [' + self.tag_to_string(article.find('time', {'itemprop' : check_words('datePublished')})) + ']'
                author = self.tag_to_string(article.find('li', {'itemprop' : check_words('creator')}))
                url =  absurl(article.find('a',{'itemprop' : check_words('url')})['href'])
                label = self.tag_to_string(article.find(attrs={'data-test' : check_words('article.type')}))
                description = label + ': ' + self.tag_to_string(article.find('div', attrs={'itemprop' : check_words('description')}))
                sections[sec_title].append(
                    {'title' : title, 'url' : url, 'description' : description, 'date' : date, 'author' : author})
        for k in ordered_sec_titles:
            index.append((k, sections[k]))
        return index
    def preprocess_html(self, soup):
        for img in soup.findAll('img',{'data-src' : True}):
            if img['data-src'].startswith('//'):
                img['src'] = 'https:' + img['data-src']
            else:
                img['src'] = img['data-src']
        return soup
 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#2 | 
| 
			
			
			
			 creator of calibre 
			
			![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,609 
				Karma: 28549044 
				Join Date: Oct 2006 
				Location: Mumbai, India 
				
				
				Device: Various 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			thanks, added.
		 
		
	
		
		
		
		
		
		
		
		
		
		
	
	 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| Advert | |
| 
         | 
    
| 
			
			 | 
		#3 | 
| 
			
			
			
			 Enthusiast 
			
			![]() Posts: 36 
				Karma: 10 
				Join Date: Dec 2017 
				Location: Los Angeles, CA 
				
				
				Device: Smart Phone 
				
				
				 | 
	
	
	
		
		
			
			 
				
				Update to Nature
			 
			
			
			Hello Kovid, thanks for adding my recipe.  Here's an update that fixes an error I found this morning. 
		
	
		
		
		
		
		
		
		
		
		
		
	
	Update to Nature: Code: 
	#!/usr/bin/env python2
from collections import defaultdict
from calibre.web.feeds.news import BasicNewsRecipe
BASE = 'https://www.nature.com'
def absurl(url):
  if url.startswith('/'):
      url = BASE + url
  elif url.startswith('http://'):
      url = 'https' + url[4:]
  return url
def check_words(words):
    return lambda x: x and frozenset(words.split()).intersection(x.split())
def has_all_of(words):
    return lambda x: x and frozenset(words.split()).issubset(x.split())
    
class Nature(BasicNewsRecipe):
    title = 'Nature'
    __author__ = 'Jose Ortiz'
    description = ('Nature is a weekly international multidisciplinary scientific journal'
                   ' publishing peer-reviewed research in all fields of science and'
                   ' technology on the basis of its originality, importance,'
                   ' interdisciplinary interest, timeliness, accessibility, elegance and'
                   ' surprising conclusions.  Nauture also provides rapid, authoritative,'
                   ' insightful and arresting news and interpretation of topical and coming'
                   ' trends affecting science, scientists and the wider public.')
    language = 'en'
    encoding = 'UTF-8'
    no_javascript = True
    no_stylesheets = True
    keep_only_tags = [
        dict(name='div', attrs={'data-component' : check_words('article-container')})
    ]
    remove_tags = [
        dict(attrs={'class' : check_words('hide-print')})
    ]
    def parse_index(self):
        soup = self.index_to_soup(BASE + '/nature/current-issue')
        self.cover_url = 'https:' + soup.find('img',attrs={'data-test' : check_words('issue-cover-image')})['src']
        section_tags = soup.find('div', {'data-container-type' : check_words('issue-section-list')})
        section_tags = section_tags.findAll('div', {'class' : check_words('article-section')})
        sections = defaultdict(list)
        ordered_sec_titles = []
        index = []
        for sec in section_tags:
            sec_title = self.tag_to_string(sec.find('h2'))
            ordered_sec_titles.append(sec_title)
            for article in sec.findAll('article'):
                try:
                    url =  absurl(article.find('a',{'itemprop' : check_words('url')})['href'])
                except TypeError:
                    continue
                title = self.tag_to_string(article.find('h3', {'itemprop' : has_all_of('name headline')}))
                date = ' [' + self.tag_to_string(article.find('time', {'itemprop' : check_words('datePublished')})) + ']'
                author = self.tag_to_string(article.find('li', {'itemprop' : check_words('creator')}))
                description  = self.tag_to_string(article.find(attrs={'data-test' : check_words('article.type')})) + u' • '
                description += self.tag_to_string(article.find('div', attrs={'itemprop' : check_words('description')}))
                sections[sec_title].append(
                    {'title' : title, 'url' : url, 'description' : description, 'date' : date, 'author' : author})
        for k in ordered_sec_titles:
            index.append((k, sections[k]))
        return index
    def preprocess_html(self, soup):
        for img in soup.findAll('img',{'data-src' : True}):
            if img['data-src'].startswith('//'):
                img['src'] = 'https:' + img['data-src']
            else:
                img['src'] = img['data-src']
        for div in soup.findAll('div', {'data-component': check_words('article-container')})[1:]:
            div.extract()
        return soup
 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#4 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 5 
				Karma: 10 
				Join Date: Feb 2015 
				
				
				
				Device: Kindle Paperwhite 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			Hello! I'm trying to set-up this recipe (and tweak it to work with a subscription based behind a library here in France), but always get an error "TypeError: 'NoneType' object is not subscriptable", any idea where the error might lie?
		 
		
	
		
		
		
		
		
		
		
		
		
		
	
	 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
![]()  | 
            
        
    
            
  | 
    
			 
			Similar Threads
		 | 
	||||
| Thread | Thread Starter | Forum | Replies | Last Post | 
| Nature (journal) special on scientific publishing | jehane | News | 9 | 07-03-2013 12:53 PM | 
| Nature news - updated recipe | Alexis | Recipes | 3 | 10-05-2012 03:36 PM | 
| Nature recipe request | whitecow | Recipes | 0 | 03-13-2012 03:28 PM | 
| bbc nature recipe update | scissors | Recipes | 0 | 01-28-2012 04:58 AM | 
| BBC Nature Recipe | scissors | Recipes | 0 | 12-28-2011 05:44 AM |