Hi,
I wrote a recipe to download news from jungewelt.de.
junge Welt already provides the current newspaper in epub and pdf. So I decided to use the re-useable code from achims
https://www.mobileread.com/forums/sho...1&postcount=16
This is what I have:
Spoiler:
Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2012, Sven Dziadek sven . dziadek at gmx . de'
__docformat__ = 'restructuredtext de'
GET_MOBI=False
GET_PDF=True
'''
https://www.jungewelt.de/abo/onlineabo.php
'''
import os, urllib2, zipfile, sys
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse
class TazDigiabo(BasicNewsRecipe):
title = u'junge Welt Onlineabo'
description = u'Das ePub Onlineabo der jungen Welt'
language = 'de'
lang = 'de-DE'
__author__ = 'Sven Dziadek'
needs_subscription = True
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
browser = self.get_browser()
# new login process
# must be done here so that browser is already at a website
response = browser.open('https://www.jungewelt.de/loginFailed.php')
browser.select_form(nr=1)
browser.form['username'] = self.username
browser.form['password'] = self.password
browser.submit()
# now find the correct file, we will still use the ePub file
epublink = browser.find_link(text_regex=re.compile('.*Downloads*'))
response = browser.follow_link(epublink)
epublink = browser.find_link(text_regex=re.compile('.*ePub-Datei*'))
response = browser.follow_link(epublink)
# Cheat calibre's recipe method, as in post from Starsom17
self.report_progress(0,_('downloading epub'))
dir = PersistentTemporaryDirectory()
epub_file = PersistentTemporaryFile(suffix='.epub',dir=dir)
epub_file.write(response.read())
epub_file.close()
zfile = zipfile.ZipFile(epub_file.name, 'r')
self.report_progress(0.1,_('extracting epub'))
zfile.extractall(self.output_dir)
epub_file.close()
#index = []
index = os.path.join(self.output_dir, 'content.opf')
self.report_progress(0.2,_('epub downloaded and extracted'))
#
# Now, download the remaining files
#
if (GET_MOBI):
self.report_progress(0.3,_('downloading mobi'))
mobi_file = PersistentTemporaryFile(suffix='.mobi',dir=dir)
browser.back()
response = browser.follow_link(mobilink)
mobi_file.write(response.read())
mobi_file.close()
if (GET_PDF):
self.report_progress(0.4,_('downloading pdf'))
pdf_file = PersistentTemporaryFile(suffix='.pdf',dir=dir)
browser.back()
pdflink = browser.find_link(text_regex=re.compile('.*PDF-Datei*'))
response = browser.follow_link(pdflink)
pdf_file.write(response.read())
pdf_file.close()
# Get all formats into Calibre's database as one single book entry
self.report_progress(0.6,_('Adding files to Calibre db'))
cmd = "calibredb add -1 " + dir
os.system(cmd)
#sys.exit(0)
return index
As achims suggested, I am adding the two files to the database myself so that the two books appear as one book in calibre with different formats. (calibredb add -1 does it.)
Additionionally calibre assembles the unzipped epub again.
But when I already added the epub I don't need that calibre adds it again..
So again in short:
At the moment when I use the plugin, I get two books in calibre, one with the assembled epub and another book with the original epub and the pdf in it.
Can I change that somehow?
Except for that it is ready to be used.
Thanks