Hi,
"Die Zeit" provides their current issue as EPUB download for subscribers.
With the following recipe calibre can be used to download the EPUB file from the protected webpage.
Ciao,
Steffen
Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
__docformat__ = 'restructuredtext de'
"""
Die Zeit EPUB
"""
import os, urllib2, zipfile, cookielib, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
class ZeitEPUBAbo(BasicNewsRecipe):
title = u'Zeit EPUB Abo'
description = u'Das EPUB Abo der Zeit'
language = 'de'
lang = 'de-DE'
__author__ = 'Steffen Siebert'
needs_subscription = True
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
cookie_jar = cookielib.LWPCookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar)
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(realm='ZEIT_online Angebote', uri="http://premium.zeit.de", user=self.username, passwd=self.password)
opener = urllib2.build_opener(cookie_handler, auth_handler)
urllib2.install_opener(opener)
domain = "http://premium.zeit.de"
url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
try:
f = urllib2.urlopen(url)
except urllib2.HTTPError:
self.report_progress(0,_("Can't login to download issue"))
raise ValueError('Failed to login, check your username and password')
soup = self.index_to_soup(f.read())
link = soup.find('a', href=re.compile('.*Abo_RedirectTo=epaper.zeit.de/index_abovorteile.php&user=.*'))
if not link:
self.report_progress(0,_("Can't find first link."))
raise ValueError('Failed to find first link. Look for updated recipe.')
url = domain + link["href"]
try:
f = urllib2.urlopen(url)
except urllib2.HTTPError:
self.report_progress(0,_("Can't login to download issue"))
raise ValueError('Failed to login, check your username and password')
soup = self.index_to_soup(f.read())
link = soup.find('a', href=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
if not link:
self.report_progress(0,_("Can't find second link."))
raise ValueError('Failed to find second link. Look for updated recipe.')
url = link["href"]
try:
f = urllib2.urlopen(url)
except urllib2.HTTPError:
self.report_progress(0,_("Can't login to download issue"))
raise ValueError('Failed to login, check your username and password')
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
tmp.close()
zfile = zipfile.ZipFile(tmp.name, 'r')
self.report_progress(0,_('extracting epub'))
zfile.extractall(self.output_dir)
tmp.close()
index = os.path.join(self.output_dir, 'content.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index