|
|
#1 |
|
Member
![]() Posts: 14
Karma: 10
Join Date: Jan 2012
Device: Sony PRS-T1
|
Updates for Austrian news sources
I have prepared updates for the Austrian news sources "Kleine Zeitung", "Kurier", and "Der Standard". The respective recipes are attached below.
|
|
|
|
|
|
#2 |
|
Member
![]() Posts: 14
Karma: 10
Join Date: Jan 2012
Device: Sony PRS-T1
|
Kleine Zeitung
Update for kleinezeitung.recipe:
Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class KleineZeitungRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'de_AT'
version = 1
title = u'Kleine Zeitung'
publisher = u'Kleine Zeitung GmbH & Co KG'
category = u'News, Newspaper'
description = u'Nachrichten aus \u00D6sterreich'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
masthead_url = 'https://cdn-kl.niceshops.com/images/logos/logo_kleine_invoice.jpg'
feeds = [
('Nachrichten', 'http://www.kleinezeitung.at/rss/nachrichten'),
('Politik', 'http://www.kleinezeitung.at/rss/politik'),
('Wirtschaft', 'http://www.kleinezeitung.at/rss/wirtschaft'),
('Österreich und die Welt', 'http://www.kleinezeitung.at/rss/chronik'),
('Leben', 'http://www.kleinezeitung.at/rss/leben'),
('Sport', 'http://www.kleinezeitung.at/rss/sport'),
# ('Ennstal', 'http://www.kleinezeitung.at/rss/rss_ennstal'),
# ('Graz & Umgebung', 'http://www.kleinezeitung.at/rss/rss_graz'),
# ('Leoben', 'http://www.kleinezeitung.at/rss/rss_leoben'),
# ('Murtal', 'http://www.kleinezeitung.at/rss/rss_murtal'),
# ('Mürztal', 'http://www.kleinezeitung.at/rss/rss_muerztal'),
# ('Oststeier', 'http://www.kleinezeitung.at/rss/rss_oststeier'),
# ('Süd & Südwest', 'http://www.kleinezeitung.at/rss/rss_suedsuedwest'),
# ('Südost & Süd', 'http://www.kleinezeitung.at/rss/rss_sueostsued'),
# ('Weiz', 'http://www.kleinezeitung.at/rss/rss_weiz'),
# ('Weststeier', 'http://www.kleinezeitung.at/rss/rss_weststeier'),
# ('Feldkirchen', 'http://www.kleinezeitung.at/rss/rss_feldkirchen'),
# ('Klagenfurt', 'http://www.kleinezeitung.at/rss/rss_klagenfurt'),
# ('Lavanttal', 'http://www.kleinezeitung.at/rss/rss_lavanttal'),
# ('Oberkärnten', 'http://www.kleinezeitung.at/rss/rss_oberkaernten'),
# ('Osttirol', 'http://www.kleinezeitung.at/rss/rss_osttirol'),
# ('St. Veit', 'http://www.kleinezeitung.at/rss/rss_stveit'),
# ('Villach', 'http://www.kleinezeitung.at/rss/rss_villach'),
# ('Völkermarkt', 'http://www.kleinezeitung.at/rss/rss_voelkermarkt')
]
remove_tags_before = dict(attrs={'class':'hline'})
remove_tags_after = [dict(name='div', attrs={'class':'articletext'})]
remove_tags = [dict(name='hr')]
extra_css = '''
h1 {text-align: left;}
'''
def print_version(self, url):
main, sep, id = url.rpartition('/')
return main + '/print.do'
def preprocess_html(self, soup):
if soup.find('div', {'class':'articletext'}) is None:
self.abort_article()
return soup
|
|
|
|
| Advert | |
|
|
|
|
#3 |
|
Member
![]() Posts: 14
Karma: 10
Join Date: Jan 2012
Device: Sony PRS-T1
|
Kurier
Update for kurier.recipe:
Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
kurier.at
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Kurier(BasicNewsRecipe):
title = 'Kurier'
__author__ = 'Darko Miletic'
description = 'News from Austria'
publisher = 'KURIER'
category = 'news, politics, Austria'
oldest_article = 2
max_articles_per_feed = 100
timeout = 30
no_stylesheets = True
use_embedded_content = False
language = 'de_AT'
remove_empty_feeds = True
publication_type = 'newspaper'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [
('Politik', 'http://kurier.at/politik/xml/rss'),
('Wirtschaft', 'http://kurier.at/wirtschaft/xml/rss'),
('Chronik', 'http://kurier.at/chronik/xml/rss'),
('Kultur', 'http://kurier.at/kultur/xml/rss'),
('Leben', 'http://kurier.at/leben/xml/rss'),
('Menschen', 'http://kurier.at/menschen/xml/rss'),
('Sport', 'http://kurier.at/sport/xml/rss')
]
keep_only_tags = [
dict(name='article', attrs={'class':re.compile('main-article')})
]
remove_tags = [
dict(name='div', attrs={'class':'social-media-container'}),
dict(name='section', attrs={'class':'tags'}),
dict(name='section', attrs={'class':re.compile('comment-box')}),
dict(name='section', attrs={'class':re.compile('related-content')}),
dict(name='section', attrs={'class':re.compile('article-slider')}),
dict(name='section', attrs={'class':re.compile('commentcontainer')}),
dict(name='blockquote')
]
remove_attributes = ['width','height']
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
|
|
|
|
|
|
#4 |
|
Member
![]() Posts: 14
Karma: 10
Join Date: Jan 2012
Device: Sony PRS-T1
|
Der Standard
Update for der_standard.recipe:
Code:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, absolute_import, print_function
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re
import random
from calibre.web.feeds.news import BasicNewsRecipe
class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich'
publisher ='derStandard.at'
category = 'news, politics, nachrichten, Austria'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
encoding = 'utf-8'
language = 'de_AT'
oldest_article = 1
max_articles_per_feed = 100
ignore_duplicate_articles = {'title', 'url'}
masthead_url = 'http://images.derstandard.at/2012/06/19/derStandardat_1417x274.gif'
feeds = [
(u'Newsroom', u'http://derStandard.at/?page=rss&ressort=Seite1'),
(u'Inland', u'http://derstandard.at/?page=rss&ressort=InnenPolitik'),
(u'International', u'http://derstandard.at/?page=rss&ressort=InternationalPolitik'),
(u'Wirtschaft', u'http://derStandard.at/?page=rss&ressort=Wirtschaft'),
(u'Web', u'http://derStandard.at/?page=rss&ressort=Web'),
(u'Sport', u'http://derStandard.at/?page=rss&ressort=Sport'),
(u'Panorama', u'http://derStandard.at/?page=rss&ressort=Panorama'),
(u'Etat', u'http://derStandard.at/?page=rss&ressort=Etat'),
(u'Kultur', u'http://derStandard.at/?page=rss&ressort=Kultur'),
(u'Wissenschaft', u'http://derStandard.at/?page=rss&ressort=Wissenschaft'),
(u'Gesundheit', u'http://derStandard.at/?page=rss&ressort=Gesundheit'),
(u'Bildung', u'http://derStandard.at/?page=rss&ressort=Bildung'),
(u'Meinung', u'http://derStandard.at/?page=rss&ressort=Meinung'),
(u'Lifestyle', u'http://derStandard.at/?page=rss&ressort=Lifestyle'),
(u'Reisen', u'http://derStandard.at/?page=rss&ressort=Reisen'),
(u'Familie', u'http://derstandard.at/?page=rss&ressort=Familie'),
(u'Greenlife', u'http://derStandard.at/?page=rss&ressort=Greenlife'),
(u'Karriere', u'http://derStandard.at/?page=rss&ressort=Karriere'),
(u'Immobilien', u'http://derstandard.at/?page=rss&ressort=Immobilien'),
(u'Automobil', u'http://derstandard.at/?page=rss&ressort=Automobil'),
(u'dieStandard', u'http://dieStandard.at/?page=rss&ressort=diestandard'),
(u'daStandard', u'http://daStandard.at/?page=rss&ressort=dastandard')
]
keep_only_tags = [
dict(name='div', attrs={'class':re.compile('^artikel')})
]
remove_tags = [
dict(name=['link', 'iframe', 'style', 'hr']),
dict(attrs={'class':['lookup-links', 'media-list']}),
dict(name='form',attrs={'name':'sitesearch'}),
dict(name='div', attrs={'class':['socialsharing', 'block video',
'blog-browsing section',
'diashow', 'supplemental']}),
dict(name='div', attrs={'id':'highlighted'})
]
remove_attributes = ['width', 'height']
preprocess_regexps = [
(re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
]
filter_regexps = [r'/r[1-9]*']
def get_article_url(self, article):
matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
if matchObj:
return None
return article.link
def preprocess_html(self, soup):
if soup.find('div', {'class':re.compile('^artikel')}) is None:
self.abort_article()
for t in soup.findAll(['ul', 'li']):
t.name = 'div'
return soup
def get_cover_url(self):
base_url = 'https://epaper.derstandard.at/'
url = base_url + 'shelf.act?s=' + str(random.random() * 10000)
soup = self.index_to_soup(url)
img = soup.find('img', {'class':re.compile('^thumbnailBig'), 'src':True})
if img and img['src']:
cover_url = base_url + img['src']
return cover_url
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Automatic news fetch: different news sources to different Kindles? | kcp | Calibre | 4 | 01-24-2015 03:43 PM |
| News Sources | sisterphonetica | Recipes | 5 | 06-27-2014 12:30 PM |
| [Enhancement] Add new news sources of ABC NEWS | donnie888 | Recipes | 0 | 12-23-2012 01:39 AM |
| News sources | Lob | Recipes | 2 | 02-17-2011 12:49 PM |
| Best Free News Sources | Gideon | Deals and Resources (No Self-Promotion or Affiliate Links) | 1 | 08-05-2009 02:25 AM |