#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import io
import re
import sys
import tempfile
import shutil
import zipfile
import xml.etree.ElementTree as etree
import xml.sax.saxutils as sax

__CONTAINER__ = '''<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
    <rootfiles>
        <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
    </rootfiles>
</container>
'''
__OEBPS__ = 'OEBPS'

#=======================================================================================
class EpubStorage:
    def __init__(self, epub_name ):
        self.zip = zipfile.ZipFile( epub_name, 'w' )
    
    def close( self ):
        self.zip.close()
        
    def addFile( self, sourceName, destName ):
        self.zip.write( sourceName, destName, compress_type=zipfile.ZIP_DEFLATED )
        
    def addString( self, destName, content ):
        self.zip.writestr( destName, content )
#=======================================================================================
class TOC:
    def __init__( self, storage ):
        self.storage = storage;
        self.tocs   = []

    def addTocItem( self, item ):
        self.tocs.append( item )

    def close( self ):
        tmpfile='123.txt'
        with open( tmpfile, 'wt', encoding='utf-8' ) as f:
            self._writeHeader( f )
            self._writeHead( f )
            self._writeTitle( f )
            self._writeNavMap( f )
            self._writeFooter( f )

        self.storage.addFile( tmpfile, '%s/%s' % (__OEBPS__, 'toc.ncx') )
        os.unlink( tmpfile )

    def _writeHeader( self, file ):
        file.write( '<?xml version="1.0"?>\n' )
        file.write( '<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">\n' )
        file.write( '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">\n' )

    def _writeFooter( self, file ):
        file.write( '</ncx>\n' )

    def _writeHead( self, file ):
        file.write( '<head>\n' )
        file.write( '</head>\n' )

    def _writeTitle( self, file ):
        file.write( '<docTitle>\n' )
        file.write( '</docTitle>\n' )

    def _writeNavMap( self, file ):
        file.write( '<navMap>\n' )

        for toc in self.tocs:
            file.write( toc.ToString() )

        file.write( '</navMap>\n' )

#=======================================================================================
class OpfFile:
    def __init__( self, storage ):
        self.manifest   = []
        self.spline     = []

        self.storage = storage;
        self.manifest.append( '<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>\n' );

    def close( self ):
        tmpfile='xyz.txt'
        with open( tmpfile, 'wt', encoding='utf-8' ) as f:
            self._writeHeader( f )
            self._writeMetaData( f )
            self._writeManifest( f )
            self._writeSpline( f )
            self._writeFooter( f )
        
        self.storage.addFile( tmpfile, '%s/%s' % (__OEBPS__, 'content.opf') )
        os.unlink( tmpfile )

    def _writeManifest( self, file ):
        file.write('<manifest>\n')
        for entry in self.manifest:
            file.write( entry )
        file.write('</manifest>\n')

    def _writeHeader( self, file ):
        file.write( '<?xml version="1.0"?>\n' )
        file.write( '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">\n' )

    def _writeFooter( self, file ):
        file.write( '</package>\n' )

    def _writeMetaData( self, file ):
        file.write( '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n' )
        file.write( '</metadata>\n' )

    def _writeSpline( self, file ):
        file.write( '<spine toc="ncx">\n' )
        for entry in self.spline:
            file.write( entry )
        file.write( '</spine>\n' )
    
    def addHtml( self, basename ):
        self.manifest.append( '<item id="%s" href="text/%s" media-type="application/xhtml+xml"/>\n' %(basename,basename) );
        self.spline.append( '<itemref idref="%s"/>\n' %(basename) );

    def addCss( self, basename ):
        self.manifest.append( '<item id="%s" href="text/%s" media-type="media-type="text/css"/>\n' %(basename,basename) );

#=======================================================================================
class TOCEntry:
    def __init__(self, basename, label, anchor, number=1 ):
        self.label = label
        self.basename = basename
        self.anchor = anchor
        self.number = number
        self.childs = []
        if self.label == None:
            self.label = ''
    
    def AddChild( self, child ):
        self.childs.append( child )
    
    def ToString( self ):
        sf = re.sub( r'\.[^\.]+$', '', self.basename )
        s = '<navPoint id="%s_%s" playOrder="%d">\n' % (sf, self.anchor, self.number)
        s += '<navLabel><text>%s</text></navLabel>\n' % ( sax.escape( self.label ) )
        s += '<content src="text/%s#%s"/>\n' % ( self.basename, self.anchor )
         
        for child in self.childs:
            s += child.ToString()
        
        s += '</navPoint>\n'
        return s

#=======================================================================================
class HTMLConverter:
    def __init__(self, filename, doc_nr, play_nr=1 ):
        print( 'konvertiere: ', filename )
        self.doc_nr = doc_nr
        self.play_nr = play_nr
        self.src_filename = filename
        self.out_filename = 'doc_%03d.html' % (self.doc_nr)
        self.meta = {}
        self.meta[ 'title' ] = filename
        self.toc = None

    def Title( self ):
        return self.meta[ 'title' ]
        
    def _CleanNS( self ):
        rxNS = re.compile( r'<html.*', re.I )
        with open( self.src_filename, 'rt' ) as infile:
            with open( self.out_filename, 'wt' ) as outfile:
                for line in infile:
                    if rxNS.match( line ):
                        outfile.write( '<html>\n' )
                    else:
                        outfile.write( line )

    def _MakeChapter( self ):
        xmlTree = etree.ElementTree()
        root = xmlTree.parse( self.out_filename )
        self._ReadMeta( root )
        chapters = root.findall( 'body/h2' )
        nChapter = 1
        self.toc = TOCEntry( self.out_filename, self.meta[ 'title' ], self.out_filename, self.play_nr )
        self.play_nr = self.play_nr + 1
        for c in chapters:
            sID = 'CHAPTER_%03d' % ( nChapter )
            c.attrib['id'] = sID
            self.toc.AddChild( TOCEntry( self.out_filename, c.text, sID, self.play_nr ) )
            nChapter = nChapter + 1
            self.play_nr = self.play_nr + 1
        
        xmlTree.write( self.out_filename, 'UTF-8')

    def _ReadMeta( self, root ):
        oTitle = root.find( 'head/title' )
        if oTitle != None:
            self.meta[ 'title' ] = sax.escape( oTitle.text )

        for m in root.findall( 'head/meta' ):
            aName = m.get( 'name' )
            if aName:
                self.meta[ aName ] = m.get( 'content', '' )

    def Run(self):
        self._CleanNS()
        self._MakeChapter()
    
    def Debug(self):

        print( 'META:' )
        for key in self.meta:
            print( 'Key: %s -> Val: %s' % (key, self.meta[ key ] ) )
    
    def CleanUpFile( self ):
        os.unlink( self.out_filename )
        
#=======================================================================================
class Epub:
    def __init__(self, epub_name ):
        self.doc_nr = 1
        #os.unlink( epub_name )
        self.storage = EpubStorage( epub_name )
        self.opf = OpfFile( self.storage )
        self.toc = TOC( self.storage )
        self.play_nr = 1
        
    def close( self ):
        self.storage.addString('mimetype', 'application/epub+zip' )
        self.storage.addString('META-INF/container.xml', __CONTAINER__ )
        self.opf.close()
        self.toc.close()
        self.storage.close()
        
    def addHtml( self, filename ):
        sBaseName = os.path.basename( filename )
        aConv = HTMLConverter( filename, self.doc_nr, self.play_nr )
        aConv.Run()
        self.play_nr = aConv.play_nr
        self.opf.addHtml( aConv.out_filename )
        self.toc.addTocItem( aConv.toc )
        self.storage.addFile( aConv.out_filename, '%s/text/%s' % (__OEBPS__, aConv.out_filename) )
        aConv.CleanUpFile()
        self.doc_nr = self.doc_nr + 1

    def addCss( self, filename ):
        sBaseName = os.path.basename( filename )
        self.opf.addCss( sBaseName )
        self.storage.addFile( filename, '%s/text/%s' % (__OEBPS__, sBaseName) )
    
    def addFile( self, filename ):
        #print( filename )
        if re.match( r'^.+\.x?html?$', file, re.I ):
            self.addHtml( filename )
        elif re.match( r'^.+\.css$', file, re.I ):
            self.addCss( filename )
        pass


#=======================================================================================
if __name__ == '__main__':
    import optparse
    sUsage = '''usage: %prog [options] files
    TODO
    '''
    parser = optparse.OptionParser( usage=sUsage, version='%prog: Version 0.1' )
    parser.add_option("-e", "--epub", help='EPub-name to create')
    parser.add_option("-d", "--dir", help='all from dir')

    (options, args) = parser.parse_args()
    
    if not options.epub:
        parser.print_help()
        sys.exit(1)

    
    epub = Epub( options.epub )
    
    if options.dir:
        for root, dirs, files in os.walk( options.dir ):
            #print( files )
            for file in files:
                epub.addFile( os.path.join(root,file) )
        
            epub.close()
            sys.exit( 0 )

    if len(args) == 0:
        parser.print_help()
        sys.exit(1)
    
    for file in args:
        epub.addFile( file )
        
    epub.close()
