# author: Joel Ricker
# date: 2001-06-28
# filename: fixepub.py
# version: 0.1
# description: fixes epubs on a B&N Nook Color so that the cover display properly
# requirements: Requires unzip and zip which can be found at http://www.info-zip.org/

import os, fnmatch
import os.path
import glob
import shutil
import subprocess
import sys
import codecs

if len(sys.argv) > 1 and os.path.isdir(sys.argv[1]):

    # top-level directory that contains books on the nook device
    # ie, G:\\My files\\Books
    
    #nook_directory = 'C:\\MyNookColor\\My Files\\Books'
    nook_directory = sys.argv[1]
else:
    print "Usage: python.exe", sys.argv[0],"<path to books on nook color>"
    sys.exit(1)
    
# path to do our work in -- a directory called temp relative to current script
# should be fine.
temp_directory = 'temp'

try:
    # cleanup directory if it already exists.
    shutil.rmtree(temp_directory)
except Exception, e:
    pass

def find_files(ext, path):
    matches = []
    for root, dirnames, filenames in os.walk(path):
      for filename in fnmatch.filter(filenames, ext):
          matches.append(os.path.join(root, filename))

    return matches

for original_epub in find_files('*.epub', nook_directory):
    
    os.mkdir('temp')

    # book_title is the epub archvie name minus the extension
    book_title = original_epub[original_epub.rindex('\\')+1:original_epub.rindex('.')]

    # book_filename is the epub archive name with extension
    book_filename = original_epub[original_epub.rindex('\\')+1:]

    # the path the epub will be extracted to
    book_extract_path = temp_directory + '\\' + book_title

    print '-'* 30
    print 'Fixing', book_filename

    os.mkdir(book_extract_path)
    
    process = subprocess.Popen('unzip.exe -q -o "' + original_epub + '" -d "' + book_extract_path +
                               '"', bufsize=1024, stdout=None)
    process.communicate()
    del process

    opf_path = find_files('*.opf', book_extract_path)[0]
    print "\t\tFound", opf_path

    import xml.dom.minidom
    dom = xml.dom.minidom.parse(opf_path)

    # First, look for an item that looks simlilar to this:
    # <item href="xhtml/cover.html" id="cover" media-type="application/xhtml+xml"/>
    # if it does, change id=cover to id=cover-page and update
    # <spine toc="ncx">
    #    <itemref idref="cover"/>   <-- change to cover-page

    for item_element in dom.getElementsByTagName('item'):
        if item_element.getAttribute('id') == 'cover':
            if "xhtml+xml" in item_element.getAttribute('media-type'):

                spine_element = dom.getElementsByTagName('spine')[0]
                
                for itemref_element in spine_element.getElementsByTagName('itemref'):
                    if itemref_element.getAttribute('idref') == 'cover':
                        item_element.setAttribute('id', 'cover-page')
                        itemref_element.setAttribute('idref', 'cover-page')

    # Second, check that that there is a meta tag that looks like this:
    #       <meta name="cover" content="cover"/>
    # and an item tag that looks like this:
    #       <item id="cover" href="images/cover.png" media-type="image/png"/>
    # where the content value == id value (cover)
    # if (content/id) != cover, change it.

    found_meta = False
    for meta_element in dom.getElementsByTagName('meta'):
        if meta_element.getAttribute('name') == 'cover':
            for item_element in dom.getElementsByTagName('item'):
                if item_element.getAttribute('id') == meta_element.getAttribute('content'):
                    meta_element.setAttribute('content', 'cover')
                    item_element.setAttribute('id', 'cover')
                    found_meta = True

    # Next, if we failed to find meta data from above, create new entries into
    # the dom.
    
    if not found_meta:
        metadata_element = dom.getElementsByTagName('metadata')[0]
        meta_element = dom.createElement('meta')
        meta_element.setAttribute('name', 'cover')
        meta_element.setAttribute('content', 'cover')
        metadata_element.insertBefore(meta_element, metadata_element.firstChild)

        manifest_element = dom.getElementsByTagName('manifest')[0]
        item_element = dom.createElement('item')

        # these will get set in the next step when we verify
        # the location of the cover image
        item_element.setAttribute('id', 'cover')
        item_element.setAttribute('href', '')
        item_element.setAttribute('media-type', '')
        manifest_element.insertBefore(item_element, manifest_element.firstChild)

    # Finally, check that
    #       <item id="cover" href="images/cover.png" media-type="image/png"/>
    # points to a valid image. If not, copy over a new image from the same directory
    # as the original epub and update href accordingly.
    for item_element in dom.getElementsByTagName('item'):
        if item_element.getAttribute('id') == 'cover':

            # make sure path exists and points to a valid looking image file

            item_filename = book_extract_path + '\\' + item_element.getAttribute('href')

            if os.path.isfile(item_filename) and (
                item_filename.endswith('jpg') or item_filename.endswith('png') ):

                #print "Found cover at", item_element.getAttribute('href')

                # Everything should be a-ok. Time to move on to the next step
                # and wrap this thing up
                break
            
            else:
                original_cover = None

                if os.path.isfile(original_epub[:original_epub.rindex('.')] + '.png'):
                    original_cover = original_epub[:original_epub.rindex('.')] + '.png'
                elif os.path.isfile(original_epub[:original_epub.rindex('.')] + '.jpg'):
                    original_cover = original_epub[:original_epub.rindex('.')] + '.jpg'
                else:
                    print "Could not find any covers for", book_title
                    break

                # we want the cover file to go into the same directory so this
                # the directory portion of opf_path, plus cover_xyzzy then the
                # extension of the original cover image.
                item_path = opf_path[:opf_path.rindex('\\')] + '\\cover_xyzzy' + \
                                original_cover[original_cover.rindex('.'):]
                
                # copy cover over to archive output directory (META-INF) as cover_xyzzy
                # and update item tag
                shutil.copyfile(original_cover, item_path)
                                
                item_filename = 'cover_xyzzy' + \
                                original_cover[original_cover.rindex('.'):]

                item_element.setAttribute('href', item_filename)
                if item_filename.endswith('jpg'):
                    item_element.setAttribute('media-type', 'image/jpeg')
                elif item_filename.endswith('png'):
                    item_element.setAttribute('media-type', 'image/png')
                    
                break
                    

    # Now that everything is fixed, all thats left to do is update the xml, zip the file up,
    # and replace the epub in the original location.

    dom.writexml( codecs.open(opf_path, 'w', encoding='utf-8'), encoding='utf-8')

    new_epub = temp_directory + '\\' + book_title + '.epub'
    new_epub_glob = book_extract_path + '\\*'

    process = subprocess.Popen(os.getcwd() + '\\zip.exe -r ' +
                               '"' + os.getcwd() + '\\' + new_epub + '"' +
                               ' *',
                               bufsize=1024,  cwd=os.getcwd() + '\\' + book_extract_path)
    process.communicate()
    del process

    shutil.copy(new_epub, original_epub)
    
    shutil.rmtree(temp_directory)

    


