#!/Python35/python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

__all__=["convert2Epub", "prettifyXHTMLFile2", "show_msgbox", "fileNotLoadedError", "split2Files", "removeNonAsciiChars", "educateHeader", "addHTMLTail", "addHTMLHeaders1", "addMainHeaders", "removeHTMLHeaders", "sanitizeFile", "createTOCFile", "createTOCStyles", "addLinkRel2", "addSpineItems", "createNCXFile2", "removeDocTOC", "createNCXFile", "createCoverImageFile", "addOPFMetadata", "storeHTMLNSHeader", "addTopHeaders", "getLinkRel", "addLinkRel", "prettifyCSS", "cleanExit", "transformInternalLinks", "removeWrap", "moveStyles2CSS", "removeDocTOC2", "fileSaveWarning", "fixPunctuation", "getODTImages", "getOtherImages", "createMIMETYPEFile", "createContainerXMLFile", "buildOPFFile", "createOPFHTMLHeader", "addOPFMetadata", "addManifestData", "addSpineData", "addGuideSectionData", "isTablePresent", "getImageSize", "formatImages", "reformatBookImages", "removeNakedSpans"]


from doc_tidy import *
from cutils import *
from cleaner import repairHTMLIDs, checkFileEncoding, convertFile2UTF8
from zutils import *
import options
import os, os.path, sys, codecs, inspect, chardet, re, time, shutil
from tempfile import mkdtemp      
from decimal import *
import textwrap
from PIL import Image
import time

import tkinter as tk
import tkinter.messagebox as mbox
from string import capwords
from locale import getdefaultlocale

try:
    from sigil_bs4 import BeautifulSoup, SoupStrainer, Comment
except:
    from bs4 import BeautifulSoup, SoupStrainer, Comment 
  
def convert2Epub(bk, wdir, filepath):
    
    # local user dir
    ldir = options.LDIR
    file = os.path.basename(filepath)
    
    # ebook metadata
    meta = options.META_OPTIONS

    # get ebook images
    if options.DOCTYPE == 'OpenOffice' or \
        options.DOCTYPE == 'LibreOffice':
        images_dir, images = getODTImages(wdir, ldir)
    else:
        images_dir, images = getOtherImages(wdir, ldir)
        
    if options.SYS_EXIT == True:
        return(0)        
    
    # add file path
    file = os.path.join(wdir, file)
    
    # take off wrap to prevent parsing errors
    docTidyNoWrap(wdir, file)
    
    # move both html styles and inline styling to the stylesheet
    moveStyles2CSS(bk, wdir, file)
    
    # clean and prettify the file
    prettifyXHTMLFile(wdir, file)
    
    # set the link from toc stylesheet to html
    link_rel = getLinkRel(wdir, file)
    
    # remove the doc TOC
    #removeDocTOC(wdir, file)
    
    # reformat ebook images
    reformatBookImages(wdir, images_dir, file)
    if options.SYS_EXIT == True:
        return(0)
    
    # split the html file into separate epub files
    headings, basename_list, file_path_list = split2Files(bk, wdir, os.path.join(wdir, file))
    
    # transform and preserve all links and bookmarks
    transformInternalLinks(bk, wdir, basename_list)
    
    # create OPF file
    buildOPFFile(wdir, file_path_list, meta, images)
    
    # create the toc.ncx
    createNCXFile(bk, wdir, file_path_list, headings)
    
    # add the cover image file
    createCoverImageFile(bk, wdir)

    # create container.xml
    createContainerXMLFile(wdir)
    
    # create the TOC file
    if options.TOC_SELECTED  == True:
        createTOCFile(bk, wdir, basename_list, headings)
                 
    # remove unused ids
    removeUnusedBookmarks(bk, wdir, file_path_list)
    
    # move anchor end tags
    repositionAnchorEndTags(wdir, file_path_list)
    
    # set default ebook font
    setDefaultFont(wdir)
    
    # prettify and add the css links to html      
    for f in file_path_list:
        addLinkRel(wdir, f, link_rel)
        repairHTMLIDs(wdir, f)
        prettifyXHTMLFile(wdir, f)
        removeNakedSpans(wdir, f)
    
    # create the epub    
    epub_path = ZipArc(file, ldir, wdir, file_path_list, images, meta['cover'])    
    
    return(epub_path)
        
def show_msgbox(title, msg, msgtype='info'):
    """ For general information, warnings and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showinfo(title, msg))
    elif msgtype == 'warning':
        return(mbox.showwarning(title, msg))
    elif msgtype == 'error':
        return(mbox.showerror(title, msg))          
        
def fileNotLoadedError(title, msgtype):
    msg = 'You have not loaded an html file into Sigil. \n\n' + \
          'You must first load an html file into Sigil and then run this plugin. Please try again.'
    
    print('\n >>> Warning!! You have not loaded an html file into Sigil.')
    print(' >>> You must first load an html file into Sigil and then run this plugin. ' + \
          'This plugin can only be used with html files imported into Sigil. Please try again.')
          
    show_msgbox(title, msg, msgtype)
    
    return(0)            
    
def split2Files(bk, wdir, file):
    """ Splits the html file into separate xhtml chapter files 
    """
    
    print('In split2file()...')
    if options.H1_SELECTED:
        chosen_heading_style = '<h1'
    elif options.H2_SELECTED:
        chosen_heading_style = '<h2'
    else:
        chosen_heading_style = '<h1'    
    
    out_path = ''
    header = ''
    write_bodytext = False
    file_path_names = []
    file_name = os.path.basename(file) 
    showHTMLBodyOnly(wdir, file)
    section_no = 0
    headings = []

    print(' -- Split all chapters/headers into separate xhtml files') 
    print(' -- chosen_heading_style...' + chosen_heading_style)
    
    print('\n >>> splits file name...' + file + '\n')
    file = os.path.join(wdir, file)    
    # split the headings into separate xhtml 
    # files to help create the epub structure
    with open(file, 'rt', encoding=('utf-8')) as infp:
        first_time = True
        
        for line in infp:
                
            if chosen_heading_style in line or first_time == True:  
                # the first_time flag is used to create the 'title' section
                # The book title should be the first line of text in the ebook 
                # else errors   
                first_time = False
                
                # remove all blank lines before the title               
                liner = BeautifulSoup(line, 'html.parser').text
                if liner.strip() == '':
                    first_time = True
                    continue

                if chosen_heading_style in line:
                    html = BeautifulSoup(line, 'html.parser')
                    headings.append(html.text)                
                    
                # standardize the file names    
                if section_no < 10:    
                    header = 'section-0000' + str(section_no) + '.xhtml'
                else:
                    header = 'section-000' + str(section_no) + '.xhtml'
                    
                # create the header file lists for later use
                out_path = os.path.join(wdir, header)
                outfp = open(out_path, 'wt', encoding=('utf-8'))
                file_path_names.append(out_path)
                
                # add the html namespace headers
                addMainHeaders(outfp, header) 
                outfp.write(line)
                write_bodytext = True
                section_no += 1
   
            # write the body text for each main heading
            elif write_bodytext == True:
                liner = BeautifulSoup(line, 'html.parser').text
                if '<img' not in line and \
                    '<div' not in line and \
                    '</div>' not in line:                
                    if liner.strip() == '':
                        continue                                                    
                outfp.write(line.strip() + '\n')
                                     
    outfp.close()
     
       
    print('\n >>> title page name...' + file_path_names[0])   
    # change the title file name to 'title.xhtml'
    old_name = file_path_names[0]
    file_path_names[0] = wdir + os.sep + 'title.xhtml'     
    os.rename(os.path.join(wdir, old_name), file_path_names[0])
    # remove the original html file from the work dir
    os.remove(os.path.join(wdir, file_name))  

    
    # add </body> and </html> to the end of the files
    # and prettify the html 
    for file1 in file_path_names:
        addHTMLTail(wdir, file1)
        prettifyXHTMLFile(wdir, file1)
    
    for g in file_path_names:    
        print(' >>> file path list after split...' + g)
   
    # create basename and file path arrays for later use
    base_file_names = list()
    for f in file_path_names:
        base_file_names.append(os.path.basename(f))
        
    for f in base_file_names:    
        print(' >>> basename list...' + f)
  
    return(headings, base_file_names, file_path_names)        
    
def removeNonAsciiChars(line):
    """ Converts non-ascii to ascii chars. 
        Used for the file split to format the 
        xhtml file names for the zip file 
        which can only handle ascii chars
    """
    
    line = line.replace('“', '"')
    line = line.replace('”', '"')
    line = line.replace("’", "'")
    line = line.replace('…', '...')
    
    return(line)    
    
def educateHeader(header):
    """ Manipulate ASCII chars
    """
    
    header = header.replace('"', '')
    header = header.replace("'", "")
    header = header.replace('-', '_')
    header = header.replace('—', '_')
    header = header.replace('*', '_')
    header = header.replace('<', '_')
    header = header.replace('>', '_')
    header = header.replace('?', '_')
    header = header.replace(':', '_')
    header = header.replace('/', '_')
    header = header.replace('\\', '_')
    header = header.replace('|', '_')
    
    # Windows doesn't like path components that end with a period
    if header.endswith('.'):
        header = header[:-1]+'_'
    
    # Mac and Unix don't like file names that begin with a full stop
    if len(header) > 0 and header[0:1] == '.':
        one = '_' + header[1:]        
    
    # remove all non-essential unicode header formatting
    header = header.replace("&#8217;", "")  # remove single quote apostrophes
    header = header.replace("&#8216;", "")
    header = header.replace("&#8220;", "")  # remove double quotes
    header = header.replace("&#8221;", "") 
    
    return(header)    
    
def addHTMLTail(wdir, file):
    infp = open(file,'rt', encoding=('utf-8'))
    output = wdir + os.sep + "tails.html"
    outfp = open(output, 'wt', encoding=('utf-8'))
    
    for line in infp:
        outfp.write(line)
        
    outfp.write('\n</body>\n</html>\n\n')            
    outfp.close()
    infp.close()
    
    os.remove(file)
    os.rename(output, file)   
    return(0)    
    
def addHTMLHeaders1(outfp, fname):    
    
    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n') 
    outfp.write('<html xmlns="http://www.w3.org/1999/xhtml">\n')
    outfp.write('<head>\n')
    outfp.write('<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>\n')
    outfp.write('<title>' + fname + '</title>\n')
    outfp.write('</head>\n')
    outfp.write('<body>\n')
    
    return(outfp)
    
def addMainHeaders(outfp, fname):
    
    outfp = addHTMLHeaders1(outfp, fname)
    return(outfp)        
   
def removeHTMLHeaders(wdir, file):
    
    finish = False
    file = os.path.join(wdir, file)
    output = os.path.join(wdir, 'remove_heads.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:      
        for line in infp:
            if finish == False and '<body' not in line:
                continue      
            elif '<body'in line:
                finish = True            
            else:
                outfp.write(line) 
                
    outfp.close()  
    os.remove(file)
    os.rename(output, file)
    return(0)
    
def sanitizeFile(wdir, file):
        
    output = os.path.join(wdir, 'remove_heads.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:      
        for line in infp:    
            
            if '<div' in line or '</div>' in line:
                continue 
            
            outfp.write(line)    
         
    outfp.close()  
    os.remove(file)
    os.rename(output, file)
    return(0)    
    
def createTOCFile(bk, pathdir, filenames, headings):
    
    outfile = os.path.join(pathdir, 'contents.xhtml')
    outfp = open(outfile, 'wt', encoding='utf-8') 
    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n')
    outfp.write('<html xmlns="http://www.w3.org/1999/xhtml">\n')
    outfp.write('<head>\n')
    outfp.write(' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n')
    outfp.write(' <link href="../Styles/stylesheet.css" type="text/css" rel="stylesheet"/>\n')
    outfp.write(' <title>Table of Contents</title>\n')
    outfp.write('</head>\n')
    outfp.write('\n<body>\n\n')
    outfp.write('<h4 style="font-family: serif;font-size: 1.33em;text-align: center;margin-top: 0;margin-bottom: 1.25em;"><a id="toc"></a>' + 'Table of Contents' + '</h4>\n')    
    
    i = 1
    max_len = len(filenames)
    while i < max_len:
        line = '\n<p class="level-1-toc-item">' + '<a href="../Text/' + filenames[i] + '">' + headings[i-1].upper() + '</a></p>'
        outfp.write(line + '\n')
        i = i + 1
    
    outfp.write('  </body>\n</html>')    
    outfp.close()
    return(os.path.basename(outfile))    
    
def createTOCStyles(bk, wdir):

    css = options.CSS_FILE_NAME
    file = os.path.join(wdir, css)
    outfp = open(file, 'at', encoding='utf-8') 
    
    data = 'p.level-1-toc-item  {\n'
    data += 'font-family: serif;\n'
    data += 'font-size: 1.0em\n'
    data += 'font-weight: bold;\n'
    data += 'margin-top: 0em;\n'
    data += 'margin-bottom: 0.4em;\n'
    data += 'margin-right: 0em;\n'
    data += 'margin-left: 0em;\n'
    data += 'text-indent: 1.0em;\n'
    data += '}\n'
    
    data += 'a:link  {\n'
    data += 'color: #0000FF;\n'
    data += 'text-decoration: underline;\n'
    data += '}\n'
    
    outfp.write(data)
    outfp.close()
        
    return(0)        
    
def addLinkRel2(wdir, fnames):
    
    if os.path.isfile(os.path.join(wdir, 'stylesheet.css')):   
        for file in fnames:
            fname = os.path.join(wdir, file)
            output = os.path.join(wdir, 'link_rel.html')
            outfp = open(output, 'wt', encoding='utf-8')
            with open(fname, 'rt', encoding='utf8') as infp:      
                for line in infp:
                    if line.strip() == '</head>':
                        line = '  <link rel="stylesheet" href="../Styles/stylesheet.css" type="text/css"/>\n</head>\n'
                        outfp.write(line)
                    else:
                        outfp.write(line)
            outfp.close()
            os.remove(fname)
            os.rename(output, fname)
       
    if os.path.isfile(os.path.join(wdir, 'inline_styles.css')):            
        for file in fnames:    
            fname = os.path.join(wdir, file)           
            output = os.path.join(wdir, 'link_rel2.html')
            outfp = open(output, 'wt', encoding='utf-8')
            with open(fname, 'rt', encoding='utf8') as infp:      
                for line in infp:
                    if line.strip() == '</head>':
                        line = '  <link rel="stylesheet" href="../Styles/inline_styles.css" type="text/css"/>\n</head>\n\n'
                        outfp.write(line)
                    else:
                        outfp.write(line)
            outfp.close()
            os.remove(fname)
            os.rename(output, fname)    
            
    return(0)         
    
def addSpineItems(bk, basename_list):
   
    spine = []
    # add tupes in the correct order - toc first
    spine.append(('cover', None))
    spine.append(('toc', None))    
    # then add the text files
    i = 1
    for base in basename_list:
        spine.append(('body'+ str(i), None))
        i += 1
    bk.setspine(spine)    
    return(0)
    

def createNCXFile2(wdir, fp_list, base_list, meta):
    outf = work_dir + os.sep + 'toc.ncx'
    with open(outf, 'wt', encoding=('utf-8')) as outfp:  
        outfp.write('<?xml version="1.0" encoding="utf-8" ?>\n')
        outfp.write('<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"\
         "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx version="2005-1" \
        xmlns="http://www.daisy.org/z3986/2005/ncx/">\n')
        
        outfp.write('  <head>\n')
        outfp.write('    <meta content="urn:uuid:' + meta['book_id'] + '" name="dtb:uid"/>\n')
        outfp.write('    <meta content="1" name="dtb:depth"/>\n')
        outfp.write('    <meta content="0" name="dtb:totalPageCount"/>\n')
        outfp.write('    <meta content="0" name="dtb:maxPageNumber"/>\n')
        outfp.write('  </head>\n')
        outfp.write('  <docTitle>\n')
        outfp.write('    <text>' + meta['title'] + '</text>\n')
        outfp.write('  </docTitle>\n')
        outfp.write('  <navMap>\n')
        outfp.write('    <navPoint id="navPoint-1" playOrder="1">\n')
        outfp.write('      <navLabel>\n')
        outfp.write('        <text>Cover</text>\n')
        outfp.write('      </navLabel>\n')
        outfp.write('      <content src="Text/cover.xhtml"/>\n')
        outfp.write('    </navPoint>\n')    
                        
        i = 2
        x = 0
        max_count = len(file_paths)
        while x < max_count:
            fp = os.path.basename(file_paths[x])
            fp = fp.replace('_', ' ')
            fp = capwords(fp)
            fp = fp.replace(' ', '_')
            
            fp2 = fp.split('.')[0]
            fp2 = fp2.replace('_',' ')
            
            outfp.write('    <navPoint id="navPoint-' + str(i) + '" playOrder="' + str(i) + '">\n')
            outfp.write('      <navLabel>\n')
            outfp.write('        <text>' + fp2 + '</text>\n')
            outfp.write('      </navLabel>\n')
            outfp.write('      <content src="Text/' + fp + '"/>\n')
            outfp.write('    </navPoint>\n')
            x = x + 1
            i = i + 1
            
        outfp.write('  </navMap>\n')
        outfp.write('</ncx>')
            
    return(0)         
   
def removeDocTOC(wdir, file):
    
    print('\n>>> In removeDoccTOC()...')
    count = 0
    output = os.path.join(wdir, 'remove_toc.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:     
        
        for line in infp:    
            if 'Table of Contents' in line or 'Contents' in line:
                for line in infp:                  
                    if line.strip() == '':
                        continue    
                    soup = BeautifulSoup(line, 'html.parser')
                    atag = soup.find('a')
                    if atag != None and atag.has_attr('href') and '#' in atag['href'] and '<h' not in str(soup):
                        line = ''
                        outfp.write(line)
                        count += 1
                        continue
                    else:
                        break                    
       
                    
            outfp.write(line)                
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    
    if count > 0:
        output = os.path.join(wdir, 'remove_TOC.html')
        outfp = open(output, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf8').read()
        
        soup = BeautifulSoup(html, 'html.parser')
        
        for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4']):
            if tag.get_text().strip() == 'Table of Contents' or tag.get_text().strip() == 'Contents':
                tag.decompose()
      
        outfp.writelines(str(soup))
        outfp.close()
        os.remove(file)
        os.rename(output, file)
        
    #removeUnusedBookmarks(wdir, file)    
    return(0)        
    
def createNCXFile(bk, work_dir, file_paths, headings):
    
    print(' -- Create the toc.ncx XML file')
    meta = options.META_OPTIONS
    
    outf = work_dir + os.sep + 'toc.ncx'
    with open(outf, 'wt', encoding=('utf-8')) as outfp:  
        outfp.write('<?xml version="1.0" encoding="utf-8" ?>\n')
        outfp.write('<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"\
         "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx version="2005-1" \
        xmlns="http://www.daisy.org/z3986/2005/ncx/">\n')
        
        outfp.write('  <head>\n')
        outfp.write('    <meta content="urn:uuid:' + meta['book_id'] + '" name="dtb:uid"/>\n')
        outfp.write('    <meta content="1" name="dtb:depth"/>\n')
        outfp.write('    <meta content="0" name="dtb:totalPageCount"/>\n')
        outfp.write('    <meta content="0" name="dtb:maxPageNumber"/>\n')
        outfp.write('  </head>\n')
        outfp.write('  <docTitle>\n')
        outfp.write('    <text>' + meta['title'] + '</text>\n')
        outfp.write('  </docTitle>\n')
        outfp.write('  <navMap>\n')
        #outfp.write('    <navPoint id="navPoint-1" playOrder="1">\n')
        #outfp.write('      <navLabel>\n')
        #outfp.write('        <text>Cover</text>\n')
        #outfp.write('      </navLabel>\n')
        #outfp.write('      <content src="Text/cover.xhtml"/>\n')
        #outfp.write('    </navPoint>\n')    
        
        i = 1
        x = 0
        new_fp = []
        for s, f in enumerate(file_paths, 1):
            new_fp.append(f)
        file_paths = new_fp

        new_head = []
        for h, g in enumerate(headings):
            new_head.append(g)
        headings = new_head            
        
        max_count = len(headings)
        while x < max_count:
            fp = os.path.basename(file_paths[x])
            
            outfp.write('    <navPoint id="navPoint-' + str(i) + '" playOrder="' + str(i) + '">\n')
            outfp.write('      <navLabel>\n')
            outfp.write('        <text>' + capwords(headings[x]) + '</text>\n')
            outfp.write('      </navLabel>\n')
            outfp.write('      <content src="Text/' + fp + '"/>\n')
            outfp.write('    </navPoint>\n')
            
            x = x + 1
            i = i + 1
            
        outfp.write('  </navMap>\n')
        outfp.write('</ncx>')      
     
    return(0)     
    
def createCoverImageFile(bk, wdir):
    
    print(' -- Creating the cover image file')
    cover_file = os.path.basename(options.META_OPTIONS['cover'])
    shutil.copy2(options.META_OPTIONS['cover'], os.path.join(wdir, cover_file))
    os.rename(os.path.join(wdir, cover_file), os.path.join(wdir, 'cover.jpg'))
    base = cover_file
    
    # format the cover image
    img_out = os.path.join(wdir, 'cover.xhtml')
    with open(img_out, 'wt', encoding='utf-8') as outfp:
        outfp = addMainHeaders(outfp, 'Cover')
        outfp.write('<div style="padding: 0;margin: 0;">\n')
        outfp.write('    <p style="text-align: center;"><span><img alt="Cover" id="cover" src="../Images/' + base + '" style="width: 100%;height: auto;"/></span></p>\n')
        outfp.write('</div>\n')
        outfp.write('\n</body>\n</html>')
        
    return(0)    
    
def addOPFMetadata(bk):
    """ Adds just the OPF metadata to the OPF file """
    
    meta = {}
    meta = options.META_OPTIONS
    lang = getdefaultlocale()[0]
    
    if lang == None:
        lang = 'en'
    if '_' in lang:    
        lang = lang.split("_")[0]
    
    #opf_file = dir + os.sep + 'content.opf'
    #outfp = open(opf_file, 'a+t', encoding=('utf-8')) 

    # add the metadata template with markers
    data =  ('  <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">\n')
    data += ('    <dc:title>' + meta['title'] + '</dc:title>\n')
    data += ('    <dc:date opf:event="creation">' + meta['date'] + '</dc:date>\n')
    data += ('    <dc:identifier id="BookId">urn:uuid:' + meta['book_id'] + '</dc:identifier>\n')
    data += ('    <dc:language>' + lang + '</dc:language>\n')
    data += ('    <dc:creator opf:role="aut">' + meta['author'] + '</dc:creator>\n')
    data += ('    <dc:publisher>' + meta['publisher'].strip() + '</dc:publisher>\n')
    data += ('    <dc:rights>Worldwide Copyright ©' + meta['year'] + ' ' +  meta['author'] + \
                     '. All Rights Reserved.</dc:rights>\n')
    data += ('    <meta name="cover" content="cover-img" />\n')
    data += ('  </metadata>')
    bk.setmetadataxml(data)
    
    return(0)
    
def storeHTMLNSHeader(wdir, file):

    header = str()
    finish = False
    file = os.path.join(wdir, file)
    output = os.path.join(wdir, 'remove_toc.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:      
        for line in infp:
            if '<body' not in line and finish == False:
                header = header + line
                continue
            else:
                if finish == False:
                    finish = True
                    outfp.write(line)
                    
            outfp.write(line)
            
    outfp.close()  
    os.remove(file)
    os.rename(output, file)
    return(header)
    
def addTopHeaders(outfp, headers, line_save):
    """ Adds xml + doctype headers to the html file """
    
    outfp.writelines(headers)  
    file = os.path.join(wdir, file)
    output = os.path.join(wdir, 'addtitle.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:  
        for line in infp:
            if '<title>' in line:
                line = '<title>' + line_save
                outfp.write(line)
            outfp.write(line)     
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(header)    
    
def getLinkRel(wdir, file):

    linkrel = ''
    with open(file, 'rt', encoding='utf-8') as infp:  
        for line in infp:
            if '<link' in line and 'rel="stylesheet"' in line.replace(' ', ''):
                linkrel = linkrel + line
                print('\n >>> getLinkRel...' + line)
    return(linkrel)
    
def addLinkRel(wdir, file, linkrel):
    """ Adds a css meta link to all html files. """
    output = os.path.join(wdir, 'add_linkrel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:  
        for line in infp:
            if '</head>' in line:
                line = linkrel + '</head>\n\n'
                outfp.write(line)
            else:    
                outfp.write(line)     
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)                                       

def prettifyCSS(wdir, css):
    """ Prettifies the html file """
    
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
        
            if ';}\n' in line:
                    line = line.replace(';}\n', ';\n}\n')      
          
            if 'font-family:' in line:
                continue
            
            if re.match(r'^\s*$', line):
                continue 
            
            if line.strip() == '':
                continue   
                
            if '-webkit-text-decoration-skip' in line:
                continue            
            
            if '{' in line and '  {' not in line:
                line = line.replace(' ', '')
                line = line.replace('{', '  {') 
            
            if ':' in line and ': ' not in line and 'a:' not in line:
                line = line.replace(':', ': ')                
            outfp.write(line.strip() + '\n')                
    
    outfp.close()
    os.remove(css)
    os.rename(output, css)
    return(0)              

def cleanExit(wdir):
    """ Clean up and remove all work directories and files """
    
    shutil.rmtree(wdir, ignore_errors=True)
    return(0)    
    
def transformInternalLinks(bk, wdir, files):
    print('>>> in transformInternalLinks()...')
    """ 
        This function converts all html style href links 
        to the appropriate epub file path for the id.
    """
    
    # handler for internal link problems for Google html only
    if options.DOCTYPE == 'Google':
    
        for file in files:
            file = os.path.join(wdir, os.path.basename(file))
            output = wdir + 'intLinks.html'
            outfp = open(output, 'wt', encoding='utf-8')
            html = open(file, 'rt', encoding='utf8').read() 
            
            soup = BeautifulSoup(html, 'html.parser')

            # just removes href from any manual internal links 
            for atag in soup.body.find_all(href=True):
                if str(atag['href']).strip().startswith('#'):
                    del atag['href']
     
            outfp.writelines(str(soup))  
            outfp.close()
            os.remove(file)
            os.rename(output, file)      
    
    link_dict = dict()
    tags = []  
    
    for file in files:
        file = os.path.join(wdir, os.path.basename(file))
        output = wdir + 'intLinks.html'
        outfp = open(output, 'wt', encoding='utf-8')  
        html = open(file, 'rt', encoding='utf8').read() 
        soup = BeautifulSoup(html, 'html.parser')
        
        # ensure that all "name" attrs are converted to "id" attrs
        for ntag in soup.body.find_all(name=True):
            if ntag.has_attr('name'):
                ntag['id'] = ntag['name']
                del ntag['name']
        
        # add the correct epub file path to the original hrefs   
        for htag in soup.body.find_all(href=True):
            if str(htag['href']).strip().startswith('#'):
                id = str(htag['href']).replace('#','')
                new_link_str = '../Text/' + getIDFileName(wdir, files, id) + '#' + id
                htag['href'] = new_link_str                
         
        outfp.writelines(str(soup))  
        outfp.close()
        os.remove(file)
        os.rename(output, file) 
    
    return(0)   
    
def getIDFileName(wdir, files, ID):
    
    filename = ''
    
    for file in files:
        file = os.path.join(wdir, file)
        html = open(file, 'rt', encoding='utf8').read()
        soup = BeautifulSoup(html, 'html.parser')

        for tag in soup.body.find_all(id=True):
            if tag.has_attr('id') and tag['id'] != None and tag['id'] != '': 
                if tag['id'] == ID:
                    filename = os.path.basename(file)
                    break                     
        
    return(filename)                    

def moveStyles2CSS(bk, wdir, file):
    """ Moves html styles and inline styles to the css """
    
    # move all html styles to a new stylesheet
    moveHTMLStyles2CSS(bk, wdir, file)     
    
    # append all inline styling to the stylesheet 
    moveInlineStyles2CSS(bk, wdir, file)               
      
    return(0)        
    
def removeDocTOC2(wdir, file):
    """ Removes the doc TOC from the html file. """
    
    output = os.path.join(wdir, 'remove_TOC.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:     
        
        for line in infp:    
            soup = BeautifulSoup(line, 'html.parser')
            for atag in soup.find_all('a'):
                if atag.has_attr('href') and '#' in atag['href']:
                    line = ''
                    outfp.write(line)
                    continue
                    
            outfp.write(line)                
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    
    output = os.path.join(wdir, 'remove_TOC2.html')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf8').read()
    
    soup = BeautifulSoup(html, 'html.parser')
    
    for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4']):
        if tag.get_text().strip() == 'Table of Contents' or tag.get_text().strip() == 'Contents':
            tag.decompose()
  
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)                    
    
def fileSaveWarning():    
    """ User file save warning """
    
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -10')
    localRoot.quit()    
    msg = 'Your current book will be completely replaced losing any unsaved changes. Are you sure you want to proceed?'     
    result = mbox.askquestion('Warning', msg)
    return(result)        
    
def fixPunctuation(wdir, file):

    output = os.path.join(wdir, 'remove_TOC.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:
        wrapper = textwrap.TextWrapper(fix_sentence_endings=True, width=200)
        for line in infp:
            line = wrapper.fill(line)
            outfp.write(line + '\n')
	
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)            
    
def getODTImages(wdir, ldir):  
    
    print('-- In getODTImages()...')
    
    images_dir = ldir
    images = list()
    
   # put all images into a list
    try:
        flist = os.listdir(images_dir)
    except:
        return(images)
        
    for item in flist:
        if item.endswith('.jpg') or \
            item.endswith('.jpeg') or \
            item.endswith('.png') or \
            item.endswith('.gif'):
            images.append(item) 
            
    if len(images) == 0:
        print('>>> Warning: No ebook images were found. Your ebook image files + html file ' + \
              'should both be in the same directory on your computer.\n')    
        return(images_dir, images)
        
    # copy all ebook images to the work dir     
    for img in images:
        shutil.copy(os.path.join(ldir, img), os.path.join(wdir, img))    
            
    return(images_dir, images)        
            
def getOtherImages(wdir, ldir):

    print('\n-- In getOtherImages()...')
    fpath = options.META_OPTIONS['filename']
    
    images = []
    images_dir = ''
    if options.DOCTYPE == 'Word':
        img_dirname = os.path.basename(fpath).replace('.htm', '')
        #img_dirname = img_dirname.replace(' ','')
        img_dirname = img_dirname + '_files'
        images_dir = os.path.join(ldir, img_dirname )
    
    elif options.DOCTYPE == 'Google':
        images_dir = os.path.join(ldir, 'images')
        
    # create image list
    #images_dir = images_dir.replace('\\','/')   
    flist =os.listdir(images_dir) 
    images = list()
    for img in flist:
        if img.endswith('.jpg') or \
            img.endswith('.jpeg') or \
            img.endswith('.png') or \
            img.endswith('.gif'):
            images.append(img)
            
    if len(images) == 0:
        print('>>> Warning: No ebook images were found. Your images dir + html ' + \
              'file should both be in the same directory on your computer. !!\n')    
        return(images_dir, images)    
            
    # copy all ebook images to the work dir        
    for image in images:
        shutil.copy(os.path.join(images_dir, image), os.path.join(wdir, image)) 
                     
    return(images_dir, images)        
    
def createMIMETYPEFile(work_dir):
    print(' -- Create mimetype file')
    
    mime = work_dir + os.sep + 'MIMETYPE'
    with open(mime, 'wt', encoding=('utf-8')) as outfp:
        outfp.write('application/epub+zip')
        outfp.close()
        os.chmod(mime, 0o777)
    return(1)
    
    
def createContainerXMLFile(work_dir):
    print(' -- Create the container XML file')
    
    outfile = work_dir + os.sep + 'container.xml'  
    with open(outfile, 'wt', encoding=('utf-8')) as outfp:
        outfp.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        outfp.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n')
        outfp.write('    <rootfiles>\n')    
        outfp.write('        <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" />\n')
        outfp.write('    </rootfiles>\n')
        outfp.write('</container>')
    return(1)
                
def buildOPFFile(WORK_DIR_PATH, file_name_paths, meta, image_names):
    print(' -- Build the content.opf file')
    createOPFHTMLHeader(WORK_DIR_PATH, meta)
    addOPFMetadata(WORK_DIR_PATH, meta)
    addManifestData(WORK_DIR_PATH, file_name_paths, meta, image_names)
    addSpineData(WORK_DIR_PATH, file_name_paths)
    addGuideSectionData(WORK_DIR_PATH, file_name_paths)
    return(0)
    
def createOPFHTMLHeader(dir, meta):
    
    opf = dir + os.sep + 'content.opf'
    outfp = open(opf, 'wt', encoding=('utf-8'))
    
    # write the top headers to the opf file
    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<package version="2.0" unique-identifier="PrimaryID" xmlns="http://www.idpf.org/2007/opf">\n')
    outfp.write('  <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">\n')    
    return(0)

def addOPFMetadata(dir, meta):
    lang = getdefaultlocale()[0]
    if lang == None:
        lang = 'en'
    if '_' in lang:    
        lang = lang.split("_")[0]
    
    opf_file = dir + os.sep + 'content.opf'
    outfp = open(opf_file, 'a+t', encoding=('utf-8')) 

    # add the metadata template with markers
    #outfp.write('    <dc:date>' + meta['timestamp'] + '</dc:date>\n')
    outfp.write('    <dc:date opf:event="creation">' + meta['date'] + '</dc:date>')
    outfp.write('    <dc:title>' + meta['title'].strip() + '</dc:title>\n')
    outfp.write('    <dc:identifier id="PrimaryID">urn:uuid:' + meta['book_id'] + '</dc:identifier>\n')
    outfp.write('    <dc:language>' + lang + '</dc:language>\n')
    outfp.write('    <dc:creator opf:role="aut">' + meta['author'].strip() + '</dc:creator>\n')
    outfp.write('    <dc:publisher>' + meta['publisher'].strip() + '</dc:publisher>\n')
    outfp.write('    <dc:contributor opf:role="bkp">OpenDocHTMLImport</dc:contributor>\n')
    outfp.write('    <dc:rights>Worldwide Copyright ©' + meta['year'] + ' ' +  meta['author'].strip() + \
                     '. All Rights Reserved.</dc:rights>\n')
    outfp.write('    <meta name="cover" content="cover-img" />\n')
    outfp.write('  </metadata>\n')
    outfp.close  
    return(0)
    
def addManifestData(dir, file_paths, meta, images):
    
    opf_file = dir + os.sep + 'content.opf'
    cover_fname = os.path.basename(meta['cover'])
    outfp = open(opf_file, 'at', encoding='utf-8')
    
    outfp.write('  <manifest>\n')
    outfp.write('    <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>\n')
    outfp.write('    <item id="styles_css" href="Styles/stylesheet.css" media-type="text/css"/>\n')
    outfp.write('    <item id="cover" href="Text/cover.xhtml" media-type="application/xhtml+xml"/>\n')
    outfp.write('    <item id="contents" href="Text/contents.xhtml" media-type="application/xhtml+xml"/>\n')
    outfp.write('    <item id="title" href="Text/title.xhtml" media-type="application/xhtml+xml"/>\n') 
  
    max_len = len(file_paths)
    i = 1
    while i < max_len:
        fp = file_paths[i]
        outfp.write('    <item id="body' + str(i) + '" href="Text/' + os.path.basename(file_paths[i]) + '" media-type="application/xhtml+xml"/>\n')
        i = i + 1  
  
    
    i=0
    while i < len(images):
        # exclude the cover image
        if cover_fname in images[i]:  
            i = i + 1
            continue
        #uid = generateSmallUID()
        uid = '-00' + str(i)
        itype = images[i].split('.')[1]
        if itype == 'jpg':
            itype = 'jpeg'
        
        # add the images, if they exist
        outfp.write('    <item id="img' + uid + '" href="Images/' + images[i] +'" media-type="image/' + itype + '"/>\n') 
        i = i + 1
    
    
    # write the cover image data last    
    img_type = cover_fname.split('.')[1]
    if img_type == 'jpg':
        img_type = 'jpeg'
    outfp.write('    <item id="cover-img" href="Images/' + cover_fname + '" media-type="image/' + img_type + '"/>\n')     
    
    outfp.write('  </manifest>\n')        
    outfp.close()
    
    return(0)        
    
def addSpineData(dir, file_paths):
    
    opf_file = dir + os.sep + 'content.opf'
    outfp = open(opf_file, 'a+t', encoding=('utf-8')) 
    
    outfp.write('  <spine toc="ncx">\n')
    outfp.write('    <itemref idref="cover"/>\n')
    outfp.write('    <itemref idref="contents"/>\n') 
    outfp.write('    <itemref idref="title"/>')
    
    max_len = len(file_paths)
    i = 1
    while i < max_len:
        outfp.write('    <itemref idref="body' + str(i) + '"/>\n')
        i = i + 1                                  
    outfp.write('  </spine>\n')
    
    outfp.close()
    
    #shutil.copy2(temp1, opf_file)
    #os.remove(temp)
    #os.remove(temp1)
    
    return(0)    
    
def addGuideSectionData(dir, file_paths):

    print('\n -- Add the Go To guides for toc and cover')
    opf_file = dir + os.sep + 'content.opf'
    outfp = open(opf_file, 'at', encoding='utf-8') 
   
    max_len = len(file_paths)
    chapter_1 = ''
    i = 0
    found = False
    while i < max_len:
        # accomodate just numeric only chapters
        if '_1.xhtml' in file_paths[i]:            
            chapter_1 = os.path.basename(file_paths[i])
            found = True
            break
        elif 'Chapter_One.xhtml' in file_paths[i]:
            chapter_1 = os.path.basename(file_paths[i])
            found = True
            break    
        i = i + 1                
    
    if found == False:
        chapter_1 = 'Title.xhtml'   # set default to "title.xhtml"
    
    i = 0     
    while i < max_len:
        if 'Title' in file_paths[i]:            
            title = file_paths[i]
            break
        else:
            i = i + 1         
    
    outfp.write('  <guide>\n')    
    outfp.write('    <reference  type="cover" title="Cover" href="Text/cover.xhtml"/>\n')
    outfp.write('    <reference  type="toc" title="Contents" href="Text/contents.xhtml"/>\n')
    outfp.write('  </guide>\n')
    outfp.write('</package>')
    outfp.close()
    
    return(0)    
    
def isTablePresent(wdir, file):
    
    # ensure file is in utf-8 encoding
    encoder = checkFileEncoding(wdir, os.path.basename(file)) 
    convertFile2UTF8(wdir, os.path.basename(file), encoder)
    
    with open(file, 'rt', encoding='utf-8')as infp:
        for line in infp:
            if '<table' in line:
                msg = 'Tables have been detected in the html file. Tables are not supported by this plugin.'
                show_msgbox('Table Error', msg, msgtype='error')
                print('\n>>> Error: ' + msg + '\n\nAbort plugin...')
                return(True)
    return(False)
    
def getImageSize(images_dir, image):
    """ Uses PIL to get image dimensions 
    """

    image = image.replace('\n', '')
    try:
        img = Image.open(image)
    except:
        msg = 'Unable to find ebook image at: \n\n' + os.path.join(images_dir, os.path.basename(image))
        show_msgbox('Image Not Found', msg, msgtype='error')
        raise Exception(msg)
        sys.exit(0)

    ht = img.size[1]        
    wd = img.size[0]        
    return(wd, ht)
            
    
def formatImages(wdir, images_dir, line):

    # get the image link
    soup = BeautifulSoup(line, 'html.parser')
    tag = soup.find('img')    
    if tag.has_attr('src'):
        text = tag['src']      
    else:
        return(line)  
    
    #get the img file name from the img path    
    file_name = os.path.basename(text)
    file_path = os.path.join(wdir, file_name)
     
    # get the image dimensions
    file_path = file_path.replace(r'%20',' ')
    width, height = getImageSize(images_dir, file_path)
    perc_width = round(width/650 * 100)       # calculates width as a percentage of screen width
    perc_height = round(height/1200 * 100)    # calculates height as a percentage of screen height
    
    if perc_width >= 100:
        perc_width = 100
        
    if perc_height >= 100:
        perc_height = 100
        
    file_name = os.path.split(file_name)[1]
    file_name = file_name.replace(' ', '_')
    
    # insert the height and width image values 
    soup = BeautifulSoup(line, 'html.parser')
    img = soup.img
    
    if img.has_attr('class'):
        del img['class']
    if not img.has_attr('style'):
        if img.has_attr('width'):
            del img['width']
        if img.has_attr('height'):
            del img['height']            
        img['style'] = 'width: ' + str(perc_width) + '%;height: auto;'
    else:
        img['style'] = 'width: ' + str(perc_width) + '%;height: auto;'
    
    line = str(soup)
    return(line)    
    
def reformatBookImages(wdir, images_dir, file):
    
    print('\n -- Reformat smaller images')    
    # inserts and reformats all ebook images 
    outfile = wdir + os.sep + 'images.html'
    infp = open(file, 'rt', encoding=('utf-8'))
    outfp = open(outfile, 'wt', encoding=('utf-8'))
    for line in infp:
        if '<img' in line:
            if 'base64' not in line:
                line = formatImages(wdir, images_dir, line)
                html = BeautifulSoup(line, 'html.parser')
                atag = html.find('img')
                atag['src'] = '../Images/' + os.path.basename(atag['src'])
                atag['alt'] = ""
                line = str(html)
                outfp.write(line)
            else:
                msg = 'This plugin does not support images in base64 bitmap format. You must ensure that you insert all your ebook images in html as linked image files(as jpeg, gif, png etc). Please try again.'
                show_msgbox('Incorrect Image Format', msg, msgtype='error') 
                print('\n>>> Error: ' + msg + '\n\nAbort plugin...')
                options.SYS_EXIT = True
                return(0)                
        else:
            outfp.write(line) 
        
    outfp.close()                
    infp.close() 
    os.remove(file)
    os.rename(outfile, file)
    return(0)               
    
def hasHeadingStyle(wdir, file):
    
    if options.H1_SELECTED == True:
        heading_style = '<h1'
    else:
        heading_style = '<h2'    
        
    heading_found = False    
    with open(file, 'rt', encoding='utf-8')as infp:
        for line in infp:
            if heading_style in line:
                heading_found = True
                break                
            
    if heading_found == False:
        msg = 'Your chosen heading style -- ' + heading_style.replace('<', '') + ' -- has not been ' + \
              'detected in the html file. Please refer to the plugin release notes(Plugin Run section) ' + \
              'for more details and try again.'
        show_msgbox('Heading Style Not Found', msg, msgtype='error')
        print('\n>>> Error: ' + msg + '\n\nAbort plugin...')
        return(False)    
    
    return(True) 

def removeNakedSpans(wdir, file):
    print("\n>>> In removeNakedSpans...")
    
    # remove naked spans from all text/headers/images
    file = os.path.join(wdir, file) 
    output = os.path.join(wdir, 'temp.xhtml')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()
        
    soup = BeautifulSoup(html, 'html.parser')	

    for tag in soup.find_all('span'):
        if tag.attrs == {}:
            tag.unwrap()
            
    outfp.write(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)  
    return(0)    