http://skepticblog.org
Ok, I tried to get this one to work, but it doesn't seem to work. Somewhere in the details during processing a HTTP 403 error seems to pop up, but I will appreciate if someone can check it.
Code:
#!/usr/bin/env python
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class SkepticBlog(BasicNewsRecipe):
title = 'Skepticblog'
__author__ = 'Multiple Authors'
description = 'A collaboration among some of the most recognized names in promoting science, critical thinking, and skepticism'
oldest_article = 5
max_articles_per_feed = 15
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
publisher = 'Skeptic Magazine'
category = 'science, pseudoscience'
language = 'en'
lang = 'en-US'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags_after = [dict(name='div', attrs={'class':'ratingblock'})]
feeds = [(u'SkepticBlog', u'http://skepticblog.org/feed')]
def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
return self.adeify_images(soup)