|
|||||||
![]() |
|
|
Thread Tools | Search this Thread |
|
|
#1 |
|
Junior Member
![]() Posts: 3
Karma: 10
Join Date: Aug 2012
Device: Kindle 4
|
New recipe for "Süddeutsche Zeitung" using "E-Paper mobile" subscription
I coded another recipe for accessing the paid contents of the German newspaper "Süddeutsche Zeitung". This one accesses the subscription "SZ E-Paper Mobil" which costs only 12.50 EUR/month. The present recipe "Süddeutsche Zeitung" uses the "Digital" subscription for 19.99 EUR/month. Hopefully someone finds this helpful.
szmobil.recipe Code:
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Andreas Zeiser <andreas.zeiser@web.de>'
'''
szmobil.sueddeutsche.de/
'''
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
import re
class SZmobil(BasicNewsRecipe):
title = u'Süddeutsche Zeitung mobil'
__author__ = u'Andreas Zeiser'
description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.'
publisher = u'Sueddeutsche Zeitung'
language = u'de'
publication_type = u'newspaper'
category = u'news, politics, Germany'
no_stylesheets = True
oldest_article = 2
encoding = 'iso-8859-1'
needs_subscription = True
remove_empty_feeds = True
delay = 1
cover_source = 'http://www.sueddeutsche.de/verlag'
timefmt = ' [%a, %d %b, %Y]'
root_url ='http://szmobil.sueddeutsche.de/'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
def get_cover_url(self):
src = self.index_to_soup(self.cover_source)
image_url = src.find(attrs={'class':'preview-image'})
return image_url.div.img['src']
def get_browser(self):
browser = BasicNewsRecipe.get_browser(self)
# Login via fetching of Streiflicht -> Fill out login request
url = self.root_url + 'show.php?id=streif'
req = browser.open(url)
browser.select_form(nr=0) # to select the first form
browser['username'] = self.username
browser['password'] = self.password
req = browser.submit()
return browser
def parse_index(self):
# find all sections
src = self.index_to_soup('http://szmobil.sueddeutsche.de')
feeds = []
for itt in src.findAll('a',href=True):
if itt['href'].startswith('show.php?section'):
feeds.append( (itt.string[0:-2],itt['href']) )
all_articles = []
for feed in feeds:
feed_url = self.root_url + feed[1]
feed_title = feed[0]
self.report_progress(0, ('Fetching feed')+' %s...'%(feed_title if feed_title else feed_url))
src = self.index_to_soup(feed_url)
articles = []
shorttitles = dict()
for itt in src.findAll('a', href=True):
if itt['href'].startswith('show.php?id='):
article_url = itt['href']
article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1))
# first check if link is a special article in section "Meinungsseite"
if itt.find('strong')!= None:
article_name = itt.strong.string
article_shorttitle = itt.contents[1]
articles.append( (article_name, article_url, article_id) )
shorttitles[article_id] = article_shorttitle
continue
# candidate for a general article
if itt.string == None:
article_name = ''
else:
article_name = itt.string
if (article_name[0:10] == " mehr"):
# just another link ("mehr") to an article
continue
if itt.has_key('id'):
shorttitles[article_id] = article_name
else:
articles.append( (article_name, article_url, article_id) )
feed_articles = []
for article_name, article_url, article_id in articles:
url = self.root_url + article_url
title = article_name
pubdate = strftime('%a, %d %b')
description = ''
if shorttitles.has_key(article_id):
description = shorttitles[article_id]
# we do not want the flag ("Impressum")
if "HERAUSGEGEBEN VOM" in description:
continue
d = dict(title=title, url=url, date=pubdate, description=description, content='')
feed_articles.append(d)
all_articles.append( (feed_title, feed_articles) )
return all_articles
|
|
|
|
|
|
#2 |
|
Junior Member
![]() Posts: 3
Karma: 10
Join Date: Aug 2012
Device: Kindle 4
|
Small update
Hey,
the recipe had some problems parsing the titles of some articles. Here is the updated version. Code:
__license__ = 'GPL v3'
__copyright__ = '2012, 2013 Andreas Zeiser <andreas.zeiser@web.de>'
'''
szmobil.sueddeutsche.de/
'''
# History
# 2013.01.09 Fixed bugs in article titles containing "strong" and
# other small changes
# 2012.08.04 Initial release
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class SZmobil(BasicNewsRecipe):
title = u'Süddeutsche Zeitung mobil'
__author__ = u'Andreas Zeiser'
description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.'
publisher = u'Sueddeutsche Zeitung'
language = u'de'
publication_type = u'newspaper'
category = u'news, politics, Germany'
no_stylesheets = True
oldest_article = 2
encoding = 'iso-8859-1'
needs_subscription = True
remove_empty_feeds = True
delay = 1
cover_source = 'http://www.sueddeutsche.de/verlag'
# if you want to get rid of the date on the title page use
# timefmt = ''
timefmt = ' [%a, %d %b, %Y]'
root_url ='http://szmobil.sueddeutsche.de/'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
def get_cover_url(self):
src = self.index_to_soup(self.cover_source)
image_url = src.find(attrs={'class':'preview-image'})
return image_url.div.img['src']
def get_browser(self):
browser = BasicNewsRecipe.get_browser(self)
# Login via fetching of Streiflicht -> Fill out login request
url = self.root_url + 'show.php?id=streif'
browser.open(url)
browser.select_form(nr=0) # to select the first form
browser['username'] = self.username
browser['password'] = self.password
browser.submit()
return browser
def parse_index(self):
# find all sections
src = self.index_to_soup('http://szmobil.sueddeutsche.de')
feeds = []
for itt in src.findAll('a',href=True):
if itt['href'].startswith('show.php?section'):
feeds.append( (itt.string[0:-2],itt['href']) )
all_articles = []
for feed in feeds:
feed_url = self.root_url + feed[1]
feed_title = feed[0]
self.report_progress(0, ('Fetching feed')+' %s...'%(feed_title if feed_title else feed_url))
src = self.index_to_soup(feed_url)
articles = []
shorttitles = dict()
for itt in src.findAll('a', href=True):
if itt['href'].startswith('show.php?id='):
article_url = itt['href']
article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1))
# first check if link is a special article in section "Meinungsseite"
if itt.find('strong')!= None:
article_name = itt.strong.string
if len(itt.contents)>1:
shorttitles[article_id] = itt.contents[1]
articles.append( (article_name, article_url, article_id) )
continue
# candidate for a general article
if itt.string == None:
article_name = ''
else:
article_name = itt.string
if (article_name.find(" mehr") == 0):
# just another link ("mehr") to an article
continue
if itt.has_key('id'):
shorttitles[article_id] = article_name
else:
articles.append( (article_name, article_url, article_id) )
feed_articles = []
for article_name, article_url, article_id in articles:
url = self.root_url + article_url
title = article_name
# if you want to get rid of date for each article use
# pubdate = strftime('')
pubdate = strftime('[%a, %d %b]')
description = ''
if shorttitles.has_key(article_id):
description = shorttitles[article_id]
# we do not want the flag ("Impressum")
if "HERAUSGEGEBEN VOM" in description:
continue
d = dict(title=title, url=url, date=pubdate, description=description, content='')
feed_articles.append(d)
all_articles.append( (feed_title, feed_articles) )
return all_articles
|
|
|
|
| Advert | |
|
|
|
|
#3 |
|
Enthusiast
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 48
Karma: 32961
Join Date: Sep 2011
Device: kindle 3
|
Hi Ernst,
could you also provide an update for the free online version of the Süddeutsche. The articles seem to be cut and many picutres do not show properly. |
|
|
|
|
|
#4 | |
|
Wizard
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 1,166
Karma: 1410083
Join Date: Nov 2010
Location: Germany
Device: Sony PRS-650
|
Quote:
I am reading this recipe more or less every day and I can't see any cut image. Can you please use this thread to provide an example of your problem? (As soon SZ is online again) Last edited by Divingduck; 02-17-2013 at 06:43 AM. |
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Süddeutsche (paid) gets "old" issue | rogerben | Recipes | 1 | 04-26-2012 06:45 PM |
| Globe subscription -- anyone not get their "paper" this morning? | Mememememe | Kobo Reader | 77 | 01-20-2012 11:41 AM |
| Recipe for german newspaper "Berliner Zeitung" | a.peter | Recipes | 1 | 12-13-2011 04:02 PM |
| (Now claimed) Free (paper) subscription to "TIME" Magazine for one forum visitor | maxlawman | Deals and Resources (No Self-Promotion or Affiliate Links) | 3 | 09-17-2011 03:23 PM |