I've put together a recipe for Nuus24 to be able to enjoy some news in Afrikaans, my 1st language, there might be some other Afrikaans speaking people that might enjoy it. I used the New York Times recipe at
http://manual.calibre-ebook.com/news.html as a starting point, thanks to the author
Please feel free to add to calibre.
Spoiler:
Code:
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Nuus24(BasicNewsRecipe):
title = 'Nuus24'
__author__ = 'Nicki de Wet'
encoding = 'utf-8'
description = 'Daaglikse Afrikaanse Nuus via Nuus24'
publisher = 'Media24'
timefmt = ' [%a, %d %b, %Y]'
masthead_url = 'http://afrikaans.news24.com/images/nuus.jpg'
max_articles_per_feed = 25
remove_tags_before = dict(id='TheFeed')
remove_tags_after = dict(id='TheFeed')
remove_tags = [dict(attrs={'class':['personal-bar row-fluid', 'navbar main-menu-fixed', 'breaking-news-wrapper', 'row-fluid comments-bg', 'unstyled actions', 'modal-body', 'modal-header', 'desktop']}),
dict(id=['weather-forecast', 'topics', 'side-widgets', 'footer-container', 'sb-container', 'myModal']),
dict(name=['script', 'noscript', 'style'])]
keep_only_tags = [dict(attrs={'class':['span8 border-right']}),
dict(name=['article', 'section']),
dict(id=['img-wrapper'])]
extra_css = """
div.carousel-inner{ overflow:hidden;display: block;height:300px;}
img{display: block}
"""
# remove_attributes = ['style']
no_stylesheets = True
# extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
def parse_index(self):
soup = self.index_to_soup('http://afrikaans.news24.com/Index.aspx')
def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip()
articles = {}
key = None
key = 'Nuus in Afrikaans'
articles[key] = []
ans= []
for anchor in soup.findAll(True,
attrs={'id':['lnkLink']}):
url = re.sub(r'\?.*', '', anchor['href'])
title = self.tag_to_string(anchor, use_alt=True).strip()
print title
description = ''
pubdate = strftime('%a, %d %b')
articles[key].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
ans = [(key, articles[key])]
return ans