View Single Post
Old 11-28-2010, 08:46 AM   #8
siebert
Developer
siebert has a complete set of Star Wars action figures.siebert has a complete set of Star Wars action figures.siebert has a complete set of Star Wars action figures.
 
Posts: 155
Karma: 280
Join Date: Nov 2010
Device: Kindle 3 (Keyboard) 3G / iPad 9 WiFi / Google Pixel 6a (Android)
Quote:
Originally Posted by kovidgoyal View Post
So if the epub isn't working in fbreader, sigil or aldiko you need to open bug reports for those programs.
Is this a known issue? I can't belive that I'm the first and only one to use calibre generated EPUB files with fbreader or sigil?

I agree it would be best to fix the failing programs, but as there are several affected (including closed source programs), it seems more promising and easy to me to make calibre work around the issue.

I first tried to use unique file names, which fixed the pictures, but navigation was still broken. So I changed calibre to use unique filenames without subdirectories and the result works fine for me

So it would be great if you could apply the following change to calibre.

Ciao,
Steffen

Code:
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: siebert@steffensiebert.de-20101128132414-\
#   i7ak3h1gflu2m11b
# target_branch: http://bazaar.launchpad.net/~kovid/calibre/trunk/
# testament_sha1: cf755f77a363fc0310b3d12cb2487c4ed531d91f
# timestamp: 2010-11-28 14:25:50 +0100
# base_revision_id: kovid@kovidgoyal.net-20101128023305-\
#   0ew07r4bzia4bb0t
# 
# Begin patch
=== modified file 'src/calibre/web/feeds/__init__.py'
--- src/calibre/web/feeds/__init__.py	2010-09-13 16:15:35 +0000
+++ src/calibre/web/feeds/__init__.py	2010-11-28 13:24:14 +0000
@@ -14,6 +14,11 @@
 from calibre import entity_to_unicode, strftime
 from calibre.utils.date import dt_factory, utcnow, local_tz
 
+FEED_NAME = 'feed%d.html'
+''' Template for the feed index file. '''
+ARTICLE_NAME = 'feed%d_article%d.html'
+''' Template for the article file. '''
+
 class Article(object):
 
     def __init__(self, id, title, url, author, summary, published, content):

=== modified file 'src/calibre/web/feeds/news.py'
--- src/calibre/web/feeds/news.py	2010-11-04 22:26:10 +0000
+++ src/calibre/web/feeds/news.py	2010-11-28 13:24:14 +0000
@@ -21,7 +21,7 @@
 from calibre.web import Recipe
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
-from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
+from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed, FEED_NAME, ARTICLE_NAME
 from calibre.web.fetch.simple import option_parser as web2disk_option_parser
 from calibre.web.fetch.simple import RecursiveFetcher
 from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
@@ -912,16 +912,10 @@
 
         self.feed_objects = feeds
         for f, feed in enumerate(feeds):
-            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
-            if not os.path.isdir(feed_dir):
-                os.makedirs(feed_dir)
 
             for a, article in enumerate(feed):
                 if a >= self.max_articles_per_feed:
                     break
-                art_dir = os.path.join(feed_dir, 'article_%d'%a)
-                if not os.path.isdir(art_dir):
-                    os.makedirs(art_dir)
                 try:
                     url = self.print_version(article.url)
                 except NotImplementedError:
@@ -934,12 +928,12 @@
                 func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
                             ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
                               else self.fetch_article), url)
-                req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
+                req = WorkRequest(func, (arg, self.output_dir, f, a, len(feed)),
                                       {}, (f, a), self.article_downloaded,
                                       self.error_in_article_download)
                 req.feed = feed
                 req.article = article
-                req.feed_dir = feed_dir
+                req.feed_dir = self.output_dir
                 self.jobs.append(req)
 
 
@@ -961,8 +955,7 @@
 
         for f, feed in enumerate(feeds):
             html = self.feed2index(f,feeds)
-            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
-            with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
+            with open(os.path.join(self.output_dir, FEED_NAME%f), 'wb') as fi:
                 fi.write(html)
         self.create_opf(feeds)
         self.report_progress(1, _('Feeds downloaded to %s')%index)
@@ -1148,9 +1141,7 @@
             ref.title = 'Masthead Image'
             opf.guide.append(ref)
 
-        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
-        manifest.append(os.path.join(dir, 'index.html'))
-        manifest.append(os.path.join(dir, 'index.ncx'))
+        manifest = [dir, os.path.join(dir, 'index.html'), os.path.join(dir, 'index.ncx')]
 
         # Get cover
         cpath = getattr(self, 'cover_path', None)
@@ -1183,7 +1174,6 @@
             f = feeds[num]
             for j, a in enumerate(f):
                 if getattr(a, 'downloaded', False):
-                    adir = 'feed_%d/article_%d/'%(num, j)
                     auth = a.author
                     if not auth:
                         auth = None
@@ -1192,14 +1182,15 @@
                         desc = None
                     else:
                         desc = self.description_limiter(desc)
-                    entries.append('%sindex.html'%adir)
+                    indexname = ARTICLE_NAME%(num, j)
+                    entries.append(indexname)
                     po = self.play_order_map.get(entries[-1], None)
                     if po is None:
                         self.play_order_counter += 1
                         po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item(indexname, None, a.title if a.title else _('Untitled Article'),
                                     play_order=po, author=auth, description=desc)
-                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    last = os.path.join(self.output_dir, (indexname).replace('/', os.sep))
                     for sp in a.sub_pages:
                         prefix = os.path.commonprefix([opf_path, sp])
                         relp = sp[len(prefix):]
@@ -1226,7 +1217,7 @@
 
         if len(feeds) > 1:
             for i, f in enumerate(feeds):
-                entries.append('feed_%d/index.html'%i)
+                entries.append(FEED_NAME%i)
                 po = self.play_order_map.get(entries[-1], None)
                 if po is None:
                     self.play_order_counter += 1
@@ -1237,11 +1228,11 @@
                 desc = getattr(f, 'description', None)
                 if not desc:
                     desc = None
-                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                feed_index(i, toc.add_item(FEED_NAME%i, None,
                     f.title, play_order=po, description=desc, author=auth))
 
         else:
-            entries.append('feed_%d/index.html'%0)
+            entries.append(FEED_NAME%0)
             feed_index(0, toc)
 
         for i, p in enumerate(entries):
@@ -1253,7 +1244,7 @@
             opf.render(opf_file, ncx_file)
 
     def article_downloaded(self, request, result):
-        index = os.path.join(os.path.dirname(result[0]), 'index.html')
+        index = os.path.join(os.path.dirname(result[0]), ARTICLE_NAME%request.requestID)
         if index != result[0]:
             if os.path.exists(index):
                 os.remove(index)
@@ -1263,7 +1254,7 @@
         article = request.article
         self.log.debug('Downloaded article:', article.title, 'from', article.url)
         article.orig_url = article.url
-        article.url = 'article_%d/index.html'%a
+        article.url = ARTICLE_NAME%request.requestID
         article.downloaded = True
         article.sub_pages  = result[1][1:]
         self.jobs_done += 1

=== modified file 'src/calibre/web/feeds/templates.py'
--- src/calibre/web/feeds/templates.py	2010-08-29 18:39:20 +0000
+++ src/calibre/web/feeds/templates.py	2010-11-28 13:24:14 +0000
@@ -12,6 +12,7 @@
         TABLE, TD, TR
 
 from calibre import preferred_encoding, strftime, isbytestring
+from calibre.web.feeds import FEED_NAME, ARTICLE_NAME
 
 def CLASS(*args, **kwargs): # class is a reserved word in Python
     kwargs['class'] = ' '.join(args)
@@ -92,7 +93,7 @@
         for i, feed in enumerate(feeds):
             if feed:
                 li = LI(A(feed.title, CLASS('feed', 'calibre_rescale_120',
-                    href='feed_%d/index.html'%i)), id='feed_%d'%i)
+                    href=FEED_NAME%i)), id='feed_%d'%i)
                 ul.append(li)
         div = DIV(
                 PT(IMG(src=masthead,alt="masthead"),style='text-align:center'),
@@ -115,14 +116,14 @@
             hr.tail = '| '
 
         if f+1 < len(feeds):
-            link = A('Next section', href='../feed_%d/index.html'%(f+1))
+            link = A('Next section', href=FEED_NAME%(f+1))
             link.tail = ' | '
             navbar.append(link)
-        link = A('Main menu', href="../index.html")
+        link = A('Main menu', href="index.html")
         link.tail = ' | '
         navbar.append(link)
         if f > 0:
-            link = A('Previous section', href='../feed_%d/index.html'%(f-1))
+            link = A('Previous section', href=FEED_NAME%(f-1))
             link.tail = ' |'
             navbar.append(link)
         if top:
@@ -203,20 +204,19 @@
                 navbar.append(BR())
             navbar.append(BR())
         else:
-            next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
-                    else 'article_%d'%(art+1)
-            up = '../..' if art == number_of_articles_in_feed - 1 else '..'
-            href = '%s%s/%s/index.html'%(prefix, up, next)
+            next = FEED_NAME%(feed+1) if art == number_of_articles_in_feed - 1 \
+                    else ARTICLE_NAME%(feed, art+1)
+            href = next
             navbar.text = '| '
             navbar.append(A('Next', href=href))
-        href = '%s../index.html#article_%d'%(prefix, art)
+        href = FEED_NAME%feed + '#article_%d'%art
         navbar.iterchildren(reversed=True).next().tail = ' | '
         navbar.append(A('Section Menu', href=href))
-        href = '%s../../index.html#feed_%d'%(prefix, feed)
+        href = 'index.html#feed_%d'%feed
         navbar.iterchildren(reversed=True).next().tail = ' | '
         navbar.append(A('Main Menu', href=href))
         if art > 0 and not bottom:
-            href = '%s../article_%d/index.html'%(prefix, art-1)
+            href = ARTICLE_NAME%(feed, art-1)
             navbar.iterchildren(reversed=True).next().tail = ' | '
             navbar.append(A('Previous', href=href))
         navbar.iterchildren(reversed=True).next().tail = ' | '

=== modified file 'src/calibre/web/fetch/simple.py'
--- src/calibre/web/fetch/simple.py	2010-11-04 19:35:23 +0000
+++ src/calibre/web/fetch/simple.py	2010-11-28 13:24:14 +0000
@@ -7,7 +7,7 @@
 Fetch a webpage and its links recursively. The webpages are saved to disk in
 UTF-8 encoding with any charset declarations removed.
 '''
-import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
+import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback, hashlib
 from urllib import url2pathname, quote
 from httplib import responses
 from PIL import Image
@@ -334,7 +334,7 @@
                 self.log.exception('Could not fetch image ', iurl)
                 continue
             c += 1
-            fname = ascii_filename('img'+str(c))
+            fname = ascii_filename(hashlib.sha1(data).hexdigest())
             if isinstance(fname, unicode):
                 fname = fname.encode('ascii', 'replace')
             imgpath = os.path.join(diskpath, fname+'.jpg')

# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWSOcC/AABbZ/gARUQABa7//3
f+dWjr////BgCY76Ct7nFApBKDhsmnr6hPb1zu7iwklE9AKntMUyntE9JplPDVHqHpNpk1G1ADIH
qEao1NPUxNDQyaGgGgBiA00aAAAFU/JTYoGQNNNA0aBiAGRpkGgBoAkRE0EE9JtJpqQ8mpvU1NNA
0aDRoAZqNAIqENTKNpqPSaYmhNG0j1M1APFA0AAAJIiYgEyKeJhGmkNpE09NINAA0AaDhKOuuqvJ
v3UnQyZlo0z+RxdcjB6Hxez/j/B5jPR1oIWPhaPVMeRnUNDr1X1ufPeBDEjjWEiAwPLUhyr6mg0Y
SjR4u0HajypZOlGectwgQ4C7vQdFsRhyO4PWGQNJNtobE2L2fsIVz7Kz5oXTpnO5k9MaOGr9IL2W
Y7QpZ12u1YQY4xbErF7xrFWY2vkpMUdMKWwum7BzN5Z2dLrm4cWhVH2n6fwn+v85z4RSf9nm+S5F
0KOREOkB2nOhpauLbI8dfAxrUfkjKf7nY0oz8s1Q8qmBQ7+3w+EPt6CblA9KhGuEbcpVEBogSL4E
WHYhOdnHjybICovbhqDz6efLr1AFzSPfnsSxKgsAUGQzIBW9i01mFGNg8Rq24ngeLSaGCz+YDfvR
GUsEXFT2TmaD0SnAZlBHdihQBjkMBLwgy4sXROGSMeRcpPYKzzpIcdEsag0VZSDmBsgMvyIPLbfO
jRGq7fC9V7scJxqp/lfb6I4Vd8tNwL2q/JkqwXDME1QJgXrBdko9LvXWUp1UwTCVH9vga9xiNJD4
JZhelz2RDhqIvyKY9JVcZDQNN9Q7PPvCO4iyDxOjXU0UaQSwHIC8sVYKkcgFJ6CJSCRB3+jCaUpi
qq4IPpBWPgU31M309Bd/q6PoKD5wBXpXL43gu0nEzMQVCMdC9NS1CFG95MXuaQ9suc8PsTpRaa8w
WSQaVyOw7uBzrxL0BBkwK27wU5loTaVy+htwLJJaqUxtx46bdlfTS4Fq1tzmCqjgi912/LuocTUZ
dFtMypU4V2rZdMOfsdxI/PfLZm83vBPaE7wUpHSPpi7v68MfMCet57pU5QUgm6NJznxJA996hptX
jN2vKamWPCdxXOcU8fgVTZTIExEoGc+FKU1D47ISUvkmW1pQleuYCUCZiUPmBW0ZQwnWPSN67njQ
oMCaCmQcDrLzxF2Twh8oBVJ1GzIz0JMTQvFUyJoajs7gU1u6AzNbo4cFIBbWiZ49rol0Aqgnqzjs
3ksib2OlpNJfB3KjQi0vzol1LVM1jG/zYY24yW+GlXApVgKayGJMahiuJI5JOQNmIK7jqKyF4Kkr
rjVMsSmCtMzQ5jUcIzKr89b0jfOTYUeWghOZVDJnhHZp0ixKywFQppGKy4nLFni90cxgVChKQ04A
sIe1spYb9Dit0cAUBy/s0Z6yO2C7bNm7MlwYKSiFnILiX2Y6cdshOwFjSmfvcTQ3Gfj6zE6Mdrb0
3UmaVoMeMdd0NqhEVZvdujgci4FJYZmsc121YnLdbs3eV5ntcLrme3draU0pgpxRNxOXVE6GsFZK
/B+lHBJelrI8/HeJjJgfCPjCkDuou7mG1LM2qyWQlevaCP4g2sXu1ym2Db9Pk9giELwBFfbIL2gs
PADDqJ/BjhwZjYkLb4e4DcQB+QI+77wREXmyJA7fcBU91YMGevnp4impajvbTTExl64Hbbx0hru+
L0UotSbrSnq1KByXB3bEBC094Exa5J8YLfVsC02FCnBesFCC2m459mgu9t/fDIC4mFRGGFL2dAKv
kldVXSCxxQQTLheBbG+GQOr8/t58ymGYseLrBIK59d9Px5X+s6WQwLkw++Ff9BZVoHZjUazSRkBI
y1gtZO6qBCUoCzZUXvQgu2AtX74EcgXb0ImjIzPSSV8DM6zKlZhoeeuJw2Gw71u081iN7FhtOpWY
HJnNSoR+QKTlXuBbxkXDHJxNDViLbgLrLUxsUsgMDJqpoBd+wFidUToCgNkcB4mAK5LuR8I95cMv
hwr0Aq93M2nTu6OyO3kQh9MnPNQNTM0a+oFITjA5YNJbXiYcOLMfOJ7VkIDAR5TMatoVBHCylZ6h
FCWzSepJbN4JenaVVqu15sBqNjoC828XQeTJBZts2vSIERLf2R4Nay0B8c6H2jBp72GW3QJRwF2L
38VjEG1kRsQImE6s9g6RiICWdOQ2EAuiaYu4CMdxYCOIAdSgrwOTA5+F6CvqC8+T1b2yDDjzOvvH
LA6es75M2VpqLTsxHsLg2R1UaYmpCVgvFJwxTgFp/oMeQMF7cwXhxgj3SJD5c5W7AKPvBcOSXjyX
WEB8QO9LUpZhqbctJJ3XwhN19yacQrzhvVmb1A8uTW8OQVhmwDPOhd4xZYwWbVSTzJOHHhAKnAzO
CHJpTD1rmpkbelz8bZiqcbR0ctVzct+OwSKxbDnanVMN+7wcyUVuXJmjRlBjnqBpootXhXctNopJ
dYWJYuHjtYWd3YDuOQTuDMENLkJBT2FeALoCuXX5x9+Xi1o9fMFtIkfeCgZW0VViWTBV7JNOlH24
2tZGoV4vjygsmUbZAm79lKysAmG/Zooa1CtjJbzpwY5nLXZIYA1olnX9JDTBdMj6vVGKnq8UTPhZ
EI9x6X9zpmfmUyWNtPO4s2cHKOlghDFiq0B2C865LdnJZ0dUQiOmwZphuYWSBlW81NgCzCsFXWZM
ixyuKcUV1ppxSpDiChhEbTSDgVgrfceI5TX+WjwuRXjj2uqzSmW3KO4Ja6TcBYKEyQSZ+rUTYE1M
/OZRCB/TWF44FHaMZ5/zsAuHCKGXHM/kNUd9VjYq2YNbYAkNol5GxDBS6yWhZhtr4Rv3W2A3lsF8
4slolGPlvqjkJMeL6Yj0ZkoDiUCTiTmzJTCQUAnKic9RF3iJTLNRqryoFQF0ZkRRV0HhwwShiR9h
tK6LiiZilKpWhHKAaAVzirbAF9Hlbs/sXohMe6wFHRAvNIXxDqrYiyzAQg7dfprxg7xTLZAmmWC9
2ABtnChkDF1KOrlVRWjsUv/F3JFOFCQI5wL8AA==
siebert is offline   Reply With Quote