MobileRead Forums - View Single Post - New recipe

tzwenn · 11-26-2012, 07:28 PM

Hi,
I wrote a recipe to download news from jungewelt.de.
junge Welt already provides the current newspaper in epub and pdf. So I decided to use the re-useable code from achims https://www.mobileread.com/forums/sho...1&postcount=16

This is what I have:

Spoiler:

Code:

#!/usr/bin/env  python
# -*- coding: utf-8 -*-

__license__   = 'GPL v3'
__copyright__ = '2012, Sven Dziadek sven . dziadek at gmx . de'
__docformat__ = 'restructuredtext de'

GET_MOBI=False
GET_PDF=True

'''
https://www.jungewelt.de/abo/onlineabo.php
'''
import os, urllib2, zipfile, sys
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse


class TazDigiabo(BasicNewsRecipe):

    title = u'junge Welt Onlineabo'
    description = u'Das ePub Onlineabo der jungen Welt'
    language = 'de'
    lang = 'de-DE'

    __author__ = 'Sven Dziadek'
    needs_subscription = True

    conversion_options = {
        'no_default_epub_cover' : True
    }

    def build_index(self):
        browser = self.get_browser()
        

        # new login process
        # must be done here so that browser is already at a website
        response = browser.open('https://www.jungewelt.de/loginFailed.php')
        browser.select_form(nr=1)
        browser.form['username'] = self.username
        browser.form['password'] = self.password
        browser.submit()
        # now find the correct file, we will still use the ePub file
        epublink = browser.find_link(text_regex=re.compile('.*Downloads*'))
        response = browser.follow_link(epublink)
        epublink = browser.find_link(text_regex=re.compile('.*ePub-Datei*'))
        response = browser.follow_link(epublink)


        # Cheat calibre's recipe method, as in post from Starsom17
        self.report_progress(0,_('downloading epub'))

        dir = PersistentTemporaryDirectory()
        epub_file = PersistentTemporaryFile(suffix='.epub',dir=dir)
        epub_file.write(response.read())
        epub_file.close()
        zfile = zipfile.ZipFile(epub_file.name, 'r')
        self.report_progress(0.1,_('extracting epub'))
        zfile.extractall(self.output_dir)
        epub_file.close()
        #index = []
        index = os.path.join(self.output_dir, 'content.opf')
        self.report_progress(0.2,_('epub downloaded and extracted'))


        #
        # Now, download the remaining files
        #
        if (GET_MOBI):
           self.report_progress(0.3,_('downloading mobi'))
           mobi_file = PersistentTemporaryFile(suffix='.mobi',dir=dir)
           browser.back()
           response = browser.follow_link(mobilink)
           mobi_file.write(response.read())
           mobi_file.close()

        if (GET_PDF):
           self.report_progress(0.4,_('downloading pdf'))
           pdf_file = PersistentTemporaryFile(suffix='.pdf',dir=dir)
           browser.back()
           pdflink = browser.find_link(text_regex=re.compile('.*PDF-Datei*'))
           response = browser.follow_link(pdflink)
           pdf_file.write(response.read())
           pdf_file.close()
           
        # Get all formats into Calibre's database as one single book entry
        self.report_progress(0.6,_('Adding files to Calibre db'))
        cmd = "calibredb add -1 " + dir
        os.system(cmd)
        #sys.exit(0)

        return index

As achims suggested, I am adding the two files to the database myself so that the two books appear as one book in calibre with different formats. (calibredb add -1 does it.)
Additionionally calibre assembles the unzipped epub again.
But when I already added the epub I don't need that calibre adds it again..

So again in short:
At the moment when I use the plugin, I get two books in calibre, one with the assembled epub and another book with the original epub and the pdf in it.

Can I change that somehow?

Except for that it is ready to be used.

Thanks