Developer
Posts: 155
Karma: 280
Join Date: Nov 2010
Device: Kindle 3 (Keyboard) 3G / iPad 9 WiFi / Google Pixel 6a (Android)
|
As some users have issues with link navigation in the EPUB file created by calibre (see https://www.mobileread.com/forums/showthread.php?t=90005) while the original EPUB works fine, I patched calibre to handle downloaded EPUB files without modifying them.
As I'm new to bazaar, I couldn't find a way to export this patch alone, so it also contains my previous patch posted in https://www.mobileread.com/forums/sho...d.php?t=108656
Here is my updated recipe (works only with a patched calibre!):
Code:
#!/usr/bin/env python
# -*- coding: utf-8 mode: python -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
__docformat__ = 'restructuredtext de'
__version__ = '1.2'
"""
Die Zeit EPUB
"""
import os, urllib2, zipfile, re
from calibre.web.feeds.news import BasicNewsRecipe
class ZeitEPUBAbo(BasicNewsRecipe):
title = u'Zeit EPUB Abo'
description = u'Das EPUB Abo der Zeit'
language = 'de'
lang = 'de-DE'
__author__ = 'Steffen Siebert'
needs_subscription = True
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
domain = "http://premium.zeit.de"
url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
epubName = os.path.join(self.output_dir, 'result.epub')
browser = self.get_browser()
browser.add_password(domain, self.username, self.password)
try:
browser.open(url)
except urllib2.HTTPError:
self.report_progress(0,_("Can't login to download issue"))
raise ValueError('1: Failed to login, check your username and password')
response = browser.follow_link(text="DIE ZEIT als E-Paper")
response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
self.report_progress(0,_('downloading epub'))
f = open(epubName, "wb")
f.write(response.read())
f.close()
return epubName
And this is the necessary calibre patch (plumber.py and input.py are the relevant files):
Code:
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: siebert@steffensiebert.de-20101128162138-\
# lq8k3tkgv4im2f7o
# target_branch: http://bazaar.launchpad.net/~kovid/calibre/trunk/
# testament_sha1: 070aa8a68cee7f89dd88061b913e61ac6490dc42
# timestamp: 2010-11-28 17:23:24 +0100
# base_revision_id: kovid@kovidgoyal.net-20101128023305-\
# 0ew07r4bzia4bb0t
#
# Begin patch
=== modified file 'src/calibre/ebooks/conversion/plumber.py'
--- src/calibre/ebooks/conversion/plumber.py 2010-11-20 04:26:57 +0000
+++ src/calibre/ebooks/conversion/plumber.py 2010-11-28 16:21:38 +0000
@@ -838,6 +838,15 @@
self.dump_input(self.oeb, tdir)
if self.abort_after_input_dump:
return
+ oebExt = os.path.splitext(self.oeb)[1]
+ outExt = os.path.splitext(self.output)[1]
+ if outExt.lower() == oebExt.lower():
+ self.log("Result is already in the correct format, no further processing necessary.")
+ shutil.copyfile(self.oeb, self.output)
+ self.log(self.output_fmt.upper(), 'output written to', self.output)
+ self.flush()
+ return
+
if self.input_fmt in ('recipe', 'downloaded_recipe'):
self.opts_to_mi(self.user_metadata)
if not hasattr(self.oeb, 'manifest'):
=== modified file 'src/calibre/web/feeds/__init__.py'
--- src/calibre/web/feeds/__init__.py 2010-09-13 16:15:35 +0000
+++ src/calibre/web/feeds/__init__.py 2010-11-28 13:24:14 +0000
@@ -14,6 +14,11 @@
from calibre import entity_to_unicode, strftime
from calibre.utils.date import dt_factory, utcnow, local_tz
+FEED_NAME = 'feed%d.html'
+''' Template for the feed index file. '''
+ARTICLE_NAME = 'feed%d_article%d.html'
+''' Template for the article file. '''
+
class Article(object):
def __init__(self, id, title, url, author, summary, published, content):
=== modified file 'src/calibre/web/feeds/input.py'
--- src/calibre/web/feeds/input.py 2010-09-17 18:02:43 +0000
+++ src/calibre/web/feeds/input.py 2010-11-28 16:21:38 +0000
@@ -102,8 +102,11 @@
disabled = getattr(ro, 'recipe_disabled', None)
if disabled is not None:
raise RecipeDisabled(disabled)
- ro.download()
+ index = ro.download()
self.recipe_object = ro
+ if index.endswith('.epub'):
+ # The result is already in EPUB format, no need to search for .opf file.
+ return os.path.abspath(index)
for key, val in self.recipe_object.conversion_options.items():
setattr(opts, key, val)
=== modified file 'src/calibre/web/feeds/news.py'
--- src/calibre/web/feeds/news.py 2010-11-04 22:26:10 +0000
+++ src/calibre/web/feeds/news.py 2010-11-28 13:24:14 +0000
@@ -21,7 +21,7 @@
from calibre.web import Recipe
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
-from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
+from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed, FEED_NAME, ARTICLE_NAME
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
@@ -912,16 +912,10 @@
self.feed_objects = feeds
for f, feed in enumerate(feeds):
- feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
- if not os.path.isdir(feed_dir):
- os.makedirs(feed_dir)
for a, article in enumerate(feed):
if a >= self.max_articles_per_feed:
break
- art_dir = os.path.join(feed_dir, 'article_%d'%a)
- if not os.path.isdir(art_dir):
- os.makedirs(art_dir)
try:
url = self.print_version(article.url)
except NotImplementedError:
@@ -934,12 +928,12 @@
func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
else self.fetch_article), url)
- req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
+ req = WorkRequest(func, (arg, self.output_dir, f, a, len(feed)),
{}, (f, a), self.article_downloaded,
self.error_in_article_download)
req.feed = feed
req.article = article
- req.feed_dir = feed_dir
+ req.feed_dir = self.output_dir
self.jobs.append(req)
@@ -961,8 +955,7 @@
for f, feed in enumerate(feeds):
html = self.feed2index(f,feeds)
- feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
- with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
+ with open(os.path.join(self.output_dir, FEED_NAME%f), 'wb') as fi:
fi.write(html)
self.create_opf(feeds)
self.report_progress(1, _('Feeds downloaded to %s')%index)
@@ -1148,9 +1141,7 @@
ref.title = 'Masthead Image'
opf.guide.append(ref)
- manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
- manifest.append(os.path.join(dir, 'index.html'))
- manifest.append(os.path.join(dir, 'index.ncx'))
+ manifest = [dir, os.path.join(dir, 'index.html'), os.path.join(dir, 'index.ncx')]
# Get cover
cpath = getattr(self, 'cover_path', None)
@@ -1183,7 +1174,6 @@
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
- adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
@@ -1192,14 +1182,15 @@
desc = None
else:
desc = self.description_limiter(desc)
- entries.append('%sindex.html'%adir)
+ indexname = ARTICLE_NAME%(num, j)
+ entries.append(indexname)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
- parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+ parent.add_item(indexname, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
- last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ last = os.path.join(self.output_dir, (indexname).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
@@ -1226,7 +1217,7 @@
if len(feeds) > 1:
for i, f in enumerate(feeds):
- entries.append('feed_%d/index.html'%i)
+ entries.append(FEED_NAME%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
@@ -1237,11 +1228,11 @@
desc = getattr(f, 'description', None)
if not desc:
desc = None
- feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ feed_index(i, toc.add_item(FEED_NAME%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
- entries.append('feed_%d/index.html'%0)
+ entries.append(FEED_NAME%0)
feed_index(0, toc)
for i, p in enumerate(entries):
@@ -1253,7 +1244,7 @@
opf.render(opf_file, ncx_file)
def article_downloaded(self, request, result):
- index = os.path.join(os.path.dirname(result[0]), 'index.html')
+ index = os.path.join(os.path.dirname(result[0]), ARTICLE_NAME%request.requestID)
if index != result[0]:
if os.path.exists(index):
os.remove(index)
@@ -1263,7 +1254,7 @@
article = request.article
self.log.debug('Downloaded article:', article.title, 'from', article.url)
article.orig_url = article.url
- article.url = 'article_%d/index.html'%a
+ article.url = ARTICLE_NAME%request.requestID
article.downloaded = True
article.sub_pages = result[1][1:]
self.jobs_done += 1
=== modified file 'src/calibre/web/feeds/templates.py'
--- src/calibre/web/feeds/templates.py 2010-08-29 18:39:20 +0000
+++ src/calibre/web/feeds/templates.py 2010-11-28 13:24:14 +0000
@@ -12,6 +12,7 @@
TABLE, TD, TR
from calibre import preferred_encoding, strftime, isbytestring
+from calibre.web.feeds import FEED_NAME, ARTICLE_NAME
def CLASS(*args, **kwargs): # class is a reserved word in Python
kwargs['class'] = ' '.join(args)
@@ -92,7 +93,7 @@
for i, feed in enumerate(feeds):
if feed:
li = LI(A(feed.title, CLASS('feed', 'calibre_rescale_120',
- href='feed_%d/index.html'%i)), id='feed_%d'%i)
+ href=FEED_NAME%i)), id='feed_%d'%i)
ul.append(li)
div = DIV(
PT(IMG(src=masthead,alt="masthead"),style='text-align:center'),
@@ -115,14 +116,14 @@
hr.tail = '| '
if f+1 < len(feeds):
- link = A('Next section', href='../feed_%d/index.html'%(f+1))
+ link = A('Next section', href=FEED_NAME%(f+1))
link.tail = ' | '
navbar.append(link)
- link = A('Main menu', href="../index.html")
+ link = A('Main menu', href="index.html")
link.tail = ' | '
navbar.append(link)
if f > 0:
- link = A('Previous section', href='../feed_%d/index.html'%(f-1))
+ link = A('Previous section', href=FEED_NAME%(f-1))
link.tail = ' |'
navbar.append(link)
if top:
@@ -203,20 +204,19 @@
navbar.append(BR())
navbar.append(BR())
else:
- next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
- else 'article_%d'%(art+1)
- up = '../..' if art == number_of_articles_in_feed - 1 else '..'
- href = '%s%s/%s/index.html'%(prefix, up, next)
+ next = FEED_NAME%(feed+1) if art == number_of_articles_in_feed - 1 \
+ else ARTICLE_NAME%(feed, art+1)
+ href = next
navbar.text = '| '
navbar.append(A('Next', href=href))
- href = '%s../index.html#article_%d'%(prefix, art)
+ href = FEED_NAME%feed + '#article_%d'%art
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Section Menu', href=href))
- href = '%s../../index.html#feed_%d'%(prefix, feed)
+ href = 'index.html#feed_%d'%feed
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Main Menu', href=href))
if art > 0 and not bottom:
- href = '%s../article_%d/index.html'%(prefix, art-1)
+ href = ARTICLE_NAME%(feed, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href))
navbar.iterchildren(reversed=True).next().tail = ' | '
=== modified file 'src/calibre/web/fetch/simple.py'
--- src/calibre/web/fetch/simple.py 2010-11-04 19:35:23 +0000
+++ src/calibre/web/fetch/simple.py 2010-11-28 13:24:14 +0000
@@ -7,7 +7,7 @@
Fetch a webpage and its links recursively. The webpages are saved to disk in
UTF-8 encoding with any charset declarations removed.
'''
-import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
+import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback, hashlib
from urllib import url2pathname, quote
from httplib import responses
from PIL import Image
@@ -334,7 +334,7 @@
self.log.exception('Could not fetch image ', iurl)
continue
c += 1
- fname = ascii_filename('img'+str(c))
+ fname = ascii_filename(hashlib.sha1(data).hexdigest())
if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg')
# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWVUyKlEACCX/gARUQABa7//3
f+dWjr////BgDY7oqd3rnd765strJWyvc1dQG+c+g925u73mp7e72wWR1m7bwkomk9GppqeTJTye
gT0aTKDR6RtTJ6j1ABpo0CSQCaaBMpPFE9HqJ6j2qHlP1E9TEAMgAA0AIpknqnlMjI0BoDT0CMmQ
0ZAaZAJESZE0mCaTwSJsNI9SGIB6mjEaAZHqCKRU9NNE9A0mIZqnpNo1PUGhp6Q0AAAAkkE1MARi
aaKeVPaj1TzRTJspppkADIBpGkEv1w2WNxWaTtqODDyMTGLp20v7zjejchsb/1Fd+LvEoZnKHHXr
qIw488qO6ZysLu1yz2OnmZwGA8MMt2VEmctJAmMgC2oQiJCG1AtAQ5u50yO0s6Mhqj0WwKX4Y34Z
HERfeWr0oSSkEHHScltFjwN3YL2izCabSTbYm0m2xeX4CStojy1KHKF0OrNcGuZZOuNjhq/xQXss
x2ijKMuewtdlwgyWi2QOlHd0cxeaWDDsjm8WDzaVmxqthJycQUOqty4tCqPqP0/dP9f40z/cUn/p
T2R83BcC0KOEvc6UdYA8njGpHHdtGJ9YRAuEIP2UYYS+JhPSjLF2yMi5B1IH16fdGZ92cxA8mGeQ
eNxIq1YeYFh1RooGISjCgNXfcLJ0M35RAnXHOhBh40QUbtbOrjMpplOBJh8+Ajkz6dGnWyrnY5gE
pOyRnQWKtjOmq6fBLbtsBEsOyJE6+enoXEUo4UZGMgxiNP3dNhVwQRAReIepyaDTqJn3m7P5nO2u
sUCtiNVU3FYaXap7Wji6FtB3AzKcLXtVjmhNGaDHWv6oP0au8vHkjJWYT18LbcIfYaabPdtXIrdm
SRn8K7C5r2o9kGNxPKFSNjWmjYG+5vYlbv9rFThGjZ+OvZzllHultghd67xxlrTlcEEbmVkkROXU
jF9GEfD1dSt8qLmsDoKynK1s6L96nVeyu7PNRXTTR49/2cDZ03FR4IoZCcQJODAxEefniHDUSfxT
iliNVJwISVsQpFcbspgc5x7PRy2oduHE4lvdGLe0UaES1DSgkQuUXNCpHjELMRUi5DFVvlAAIISv
G5+FtoSjHEJKhRMPJ+tCYtEweSOS86kUxMJzKNi43FVm3+q+xUq9LAghWpEEf7aheclAtKixCnIk
Sa9P+XdJQHLhS+naCWRj0Rka0Ml0b1S5trf4p0otpq1oWIAbbjEYdgcdxyLrtJeQ0hVgxYFi85JK
dJfGDIaUg8mZnC9sRCpKyaa2E+OmW/ZrNZXq90p0lyL4RAm/czTqkpC4pSZRrCvpPEY7UOsxMdr9
tiJE44AQq6gz2mNUU2Z4sUHyxl3EYyRe8TXBsQmaRciHBC1ikbxgpvfdw28mGPakm1aOj0+ziDh8
tuuGVamjAihA22dOZM2YDFAx2GV1kJFlWj8SNr6U1GZkNMXxqSUnWjCIpDHcaFUox12lz9o1DyRx
dAQqTcGSZSV+gD0jFdgJJ5cYlxLB+d5LkBr60lzUjMqyvfbEdUhmoL43xoee4ckmY8aipEDgaygl
iXGqxqvkPmfyHpLCcheQWMgIFpKkhEsYnkHF51sFDIjAiZkDXtSUGAfSmuLLA0dXDguyyktLRLpB
5OLokYIVqpKk5ueA7dKvtYM4YpYB5gYEbIyIV8ukxRqDACwuxhWk3JU6i+BtLWccFoB5l2c99HPh
Ubm2bq3sTQlPFa0LVSIWC3DC/E2ScBnNUv1p7gJKF/QVQuZaN+8eJJchUqkkai2w0JwrAtHUIVpm
UOIzDyA4yfCRGdtfSsPjEHnoJAw0BpogMQlzpxoOCcM66biDU1QaFhriSExdqMVLsic7NoznyfKM
GuWY5CqQzpMvHXDWLpwUULMl4NhV2E8jymg4RmjHzJK4gQHQL874U1kc8QILpQbdMthBtgD4FShE
mEsNopLNgLh1xYtQvveNc84bnaEZspbkJjKUi2bA41wKrjAsMOms+kCzFhqLMGZmxgYwIIeOhNaD
hxmzTVb3DZ0DqTFmPojDo1HAqhZMNBU1layBxPWQLR00CRuXooVG+bSb0ny+u8nzHCqtHWjiJHsN
o6lgJshWLgKkCGSOoEo2hnQr0i/AZ3gbkl3jFiek4bEmMctMh7plj9YNjgdSi4noFnA0mdWAxSFh
gviCP6gzsXgF0jaGk2N9/m/wRCF8gRT4yC+ILAMfkB9GYeXaV9zHDg1DYhK/5eAGBAHwBHu94I1z
OBoCJAnq6fACV4BWLPHkQPrO7d5C7OrzztppiYzBbzNbspDXb6I+2kBlTbZcUoe3KoLkj8rvZQFN
ss0L7BxT5ppERha98PUdA7joarRC9SFchN7CTynLlFuKHknDJxhyMCFfMSKacBYQSOB9PZkSWeIC
tGPKaRLQOgPfQvfTiSDez5TBq8O9tbjWZp0cXFzBSkQPh4mP64/nnzMWQyF+czghAOQ+S+SF6FzO
zV0MDqvA6n0bH+EBi4tKhmT0K4dEvmDIJCj0mjdPGI5jASTZaVvqRSy1HT4MhZRyI3tJHwGJtpmY
klZvIGcBojnXrYN+1FpQVM1VRvnpN+ezYazHwWRn6NVA8nYXfUYljPaQTsGpkbmOqfBPR2IVMThI
u7ELI2OOEaBhomZmbCxBgVCJyUCiRCSSJytjo0xehW0HfcaIWw4yMkkPGMDgOIGsAuA5gfGHUrGX
wsn2b3YIVJVvpPnN/ezHU3jDS6+smWd9ZPFeQzFt0w6UkXJQMUHAZDD5yzLQ0eDzTm65YRbd1XQS
nOOJtZFNsN3WLGLHK9iLRhxklkkdN+4NXeJcRje483ISF2cvdPONYrLpVQhpO5IQyGPD3SaVwHmg
E4TUa67SZfKALShRqKeK4arZFWCQ5Uf7nvnsITP34IcurKo0WIXC8fCZwi2dsGGhgW2zsyLLOaos
t+OgcJCkUKifJ1pqYJOs19HnRQv1Pu0AWV1ADpVFoBa2EHvtAJHALS9j1eni2CF4ZHU1+fv7jQx7
R7HRwDjy1EjcHPArYse2RQCJuOmp9xsHtd2w+Rx3iH1SFxzcG/s4AMKjT/hxyfOMDih3K1z8eyhC
+RSRQTLJ0Gg2y0OHjRuStV5ukDq3AbbpTrmwx4okQh5JKx8EBVdMl463Zd0h9TI9+bNQvLfOjVoC
DoGC82m1Xa7M3W5uUQItKhumsXJ1gEzEKNKmImBq4gamz3y8rhCvAeipUmLGjc0X8kjhfGBmzunV
cYqmrHCdZqf+G1dp2FBRsTIW7OusRpeRBOXMcxciCfvL4o5dvdv2IvasCozTNjlWdJe0BxrkQ00b
6MYFimcDtApyj01AbQMIOYkBu380DbHmzSKIICiiBNindATehZNBjghetC5kceJWP+Y19TI4A/MC
YtN8j2Nad6SqTRCYi6ZJYAOOrOyUgvSkwotuHj2OJvRyqRmTAsKgvTmyklb4l1lQQ2yEhswrS6Vg
KXlvNVFgkJzkww6AGR2x2yDEnqpgJkLzkh6pQgq8fi9BBkjrM8UxfI0xZhyaFTz0rct5NHjxdtMb
KPPOe9FffOHNhjWgPsOo9Weuati2JiGtI5tp/MEo0HQJhUGjAzK+6Yyh67APEAyMToBx2Hq50dp3
uE4RO7lUg9U4GvEGNjVwQSO3xZ6lYu04V2nEQqdA7k0aRLOwfzVKnOBqhGeKvNSU1Iml2p5KJFB+
qFN8MXDfBeWviQAmkGHsPAcVr5ojvdqdB4F2mnNy4b7HBMl5XGrVlPsFc8lSrDxCveMPTIEhYQfB
qSmCEwlliks6i0ECH9VYW2EEZxjiTYxC9n032y9KyydJ6cI1rct5A2xvhv54g9TTa2iyl/UkLsOx
DCoMhMhHrJ05zeCyD6eHKBxQR22bMhgRzpZAHYozUuTZXZ78NMsQyTxP1bilKbIqUIVLGMMCqhIV
xcqnK+8qkKJEh1J6KCoCnIuTjuIDnQSFMtwOHVVigSJArEXkGYRBLC2BEhE5SAjDcEdDYrwAqtaX
UxqU3BvIcFkwyI1yEqcLLEr6A39Nej8i39xBTzpn21oWEzPM1HPUxUkwyNpk9BodQPaNiBAAlshS
TnLp0+ihlxQtCpOHhq4gNe14uuBL3RIfkQwQLz5Q0HKck1ZpsrAxaaPdbkm7Wq1KZWUH/F3JFOFC
QVTIqUQ=
Ciao,
Steffen
|