View Single Post
Old 11-19-2010, 03:22 PM   #1
siebert
Developer
siebert has a complete set of Star Wars action figures.siebert has a complete set of Star Wars action figures.siebert has a complete set of Star Wars action figures.
 
Posts: 155
Karma: 280
Join Date: Nov 2010
Device: Kindle 3 (Keyboard) 3G / iPad 9 WiFi / Google Pixel 6a (Android)
Recipe for Zeit Abo EPUB download

Hi,

"Die Zeit" provides their current issue as EPUB download for subscribers.

With the following recipe calibre can be used to download the EPUB file from the protected webpage.

Ciao,
Steffen

Code:
#!/usr/bin/env  python
# -*- coding: utf-8 -*-

__license__   = 'GPL v3'
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
__docformat__ = 'restructuredtext de'

"""
Die Zeit EPUB
"""

import os, urllib2, zipfile, cookielib, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile

class ZeitEPUBAbo(BasicNewsRecipe):

    title = u'Zeit EPUB Abo'
    description = u'Das EPUB Abo der Zeit'
    language = 'de'
    lang = 'de-DE'

    __author__ = 'Steffen Siebert'
    needs_subscription = True

    conversion_options = {
        'no_default_epub_cover' : True
    }

    def build_index(self):
	cookie_jar = cookielib.LWPCookieJar()
        cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar)
        auth_handler = urllib2.HTTPBasicAuthHandler()
        auth_handler.add_password(realm='ZEIT_online Angebote', uri="http://premium.zeit.de", user=self.username, passwd=self.password)
	opener = urllib2.build_opener(cookie_handler, auth_handler)
        urllib2.install_opener(opener)

        domain = "http://premium.zeit.de"
	url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
	
        try:
            f = urllib2.urlopen(url)
        except urllib2.HTTPError:
            self.report_progress(0,_("Can't login to download issue"))
            raise ValueError('Failed to login, check your username and password')

        soup = self.index_to_soup(f.read())
        link = soup.find('a', href=re.compile('.*Abo_RedirectTo=epaper.zeit.de/index_abovorteile.php&user=.*'))
        if not link:
            self.report_progress(0,_("Can't find first link."))
            raise ValueError('Failed to find first link. Look for updated recipe.')

        url = domain + link["href"]
        try:
            f = urllib2.urlopen(url)
        except urllib2.HTTPError:
            self.report_progress(0,_("Can't login to download issue"))
            raise ValueError('Failed to login, check your username and password')

        soup = self.index_to_soup(f.read())
        link = soup.find('a', href=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))

        if not link:
            self.report_progress(0,_("Can't find second link."))
            raise ValueError('Failed to find second link. Look for updated recipe.')

        url = link["href"]
        try:
            f = urllib2.urlopen(url)
        except urllib2.HTTPError:
            self.report_progress(0,_("Can't login to download issue"))
            raise ValueError('Failed to login, check your username and password')

        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0,_('downloading epub'))
        tmp.write(f.read())
        tmp.close()

        zfile = zipfile.ZipFile(tmp.name, 'r')
        self.report_progress(0,_('extracting epub'))

        zfile.extractall(self.output_dir)

        tmp.close()
        index = os.path.join(self.output_dir, 'content.opf')

        self.report_progress(1,_('epub downloaded and extracted'))

        return index
siebert is offline   Reply With Quote