I am happily using a very slightly expanded version of Krittika Goyals code, there are certain sections it does not get correctly ; and I will include them when I have debugged the problem. Try using this which gives most of what is needed
=============================================
Code:
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class NYTimes(BasicNewsRecipe):
title = 'The Spectator'
__author__ = 'Krittika Goyal'
description = 'UK magazine'
timefmt = ' [%d %b, %Y]'
needs_subscription = False
no_stylesheets = True
auto_cleanup = True
def articles_in_spec_section(self, section_url):
articles = []
soup = self.index_to_soup(section_url)
div = soup.find(id='centre')
for x in div.findAll(True):
if x.name == 'h1':
# Article found
title = self.tag_to_string(x)
self.log('\tFound article:', title)
a = x.find('a', href=True)
if a is None:
continue
url = a['href']
if url.startswith('/'):
url = 'http://www.spectator.co.uk'+url
articles.append({'title':title, 'url':url,
'description':'', 'date':''})
return articles
# To parse article toc
def parse_index(self):
sections = []
for title, url in [
('Politics', 'http://www.spectator.co.uk/politics/all/'),
('Essays', 'http://www.spectator.co.uk/essays/'),
('Wit & Wisdom', 'http://www.spectator.co.uk/wit-and-wisdom/all/'),
('Columnists', 'http://www.spectator.co.uk/columnists/all/'),
('Arts', 'http://www.spectator.co.uk/arts-and-culture/featured/'),
# ('Books', 'http://www.spectator.co.uk/books/'),
]:
self.log('Processing section:', title)
articles = self.articles_in_spec_section(url)
if articles:
sections.append((title,articles))
# raise SystemExit(0)
return sections
==========================================