#from __future__ import division
#from options import *

from __future__ import unicode_literals, division, absolute_import, print_function

#********************************************************************************#
#                                                                                #
# MIT Licence(OSI)                                                               #
# Copyright (c) 2017 Bill Thompson                                               #
#                                                                                #
# Permission is hereby granted, free of charge, to any person obtaining a copy   # 
# of this software and associated documentation files (the "Software"), to deal  # 
# in the Software without restriction, including without limitation the rights   #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      #
# copies of the Software, and to permit persons to whom the Software is          #
# furnished to do so, subject to the following conditions:                       # 
#                                                                                #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software.                                #
#                                                                                # 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     # 
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         # 
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  # 
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  # 
# SOFTWARE.                                                                      #
#                                                                                #  
#********************************************************************************#

import os, sys, re, os.path, codecs
import chardet
from PIL import Image
import shutil
from tempfile import mkdtemp
from random import sample
from decimal import *
import codecs
import chardet
import locale

import options
from log import *
from epubref import *
from doc_tidy import docTidyNoWrap, showHTMLBodyOnly
from hformat import (removeLevel1HTMLTags,
                    removeHTMLClutter,
                    addMainHeaders,
                    formatImages)

import tkinter as tk
import tkinter.messagebox as mbox
import time

try:
    from sigil_bs4 import BeautifulSoup
except:
    from bs4 import BeautifulSoup


__all__=["cleanFileNames", "show_msgbox", "duplicateFile", "educateHTML", "getBookImages", "removeNonAsciiChars", "splitHeaders2Files", "getImageSize", "educateHeader", "fixEncodingErrors", "removeHTMLTags", "checkMainFile", "checkCoverImageFile", "validateHTMLFile", "sanitizeHTML", "checkHeadingFormat", "checkDocType", "convertFile2UTF8", "checkFileEncoding", "cleanExit", "convert2EmValues", "convertAbs2RelCSSValues", "transformInternalLinks", "removeWrap", "removeDocTOC", "repairHTMLIDs", 'checkandRepairID']
         
     
def cleanFileNames(line):
    """ Cleans up the file names so that they are always 
        compatible on Linux/OSX/Windows. 
    """
    
    import string
    _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
    substitute='_'
    one = ''.join(char for char in line if char in string.printable)
    one = _filename_sanitize.sub(substitute, one)
    one = re.sub(r'\s', '_', one).strip()
    one = re.sub(r'^\.+$', '_', one)
    one = one.replace('..', substitute)
    # Windows doesn't like path components that end with a period
    if one.endswith('.'):
        one = one[:-1]+substitute
    # Mac and Unix don't like file names that begin with a full stop
    if len(one) > 0 and one[0:1] == '.':
        one = substitute+one[1:]    
        
    return(one) 
    
    
def show_msgbox(title, msg, msgtype='info'):
    """ For general user information and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showinfo(title, msg))
    elif msgtype == 'warning':
        return(mbox.showwarning(title, msg))
    elif msgtype == 'error':
        return(mbox.showerror(title, msg))
    

def duplicateFile(file): 
    """ Creates a duplicate file in a temp dir    
    """
    wdir = mkdtemp()
    temp_file = wdir + os.sep + os.path.basename(file)
    shutil.copyfile(file, temp_file) 
    return(temp_file, wdir)
 
def educateHTML(line):
    """ Manipulates, adds and removes tags from the html code 
    """
    
    if 'text-decoration: none;' in line:   
        line = line.replace('text-decoration: none;', '')  
    else:
        if 'text-decoration: none' in line:
            line = line.replace('text-decoration: none', '')
        
    line = line.replace('</div>', '')
    
    if '<body ' in line:
        line = '<body>\n'
        
    if '-western' in line:
        line = line.replace('-western', '')  
        
    if '<p class="western"' in line:
        line = line.replace('<p class="western"', '<p')    
    
    line = line.replace('<h1>', '<h1 class="western">')
    line = line.replace('<h2>', '<h2 class="western">')
    line = line.replace('<h3>', '<h3 class="western">')
    line = line.replace('<h4>', '<h4 class="western">')
    line = line.replace('<h5>', '<h5 class="western">')
    line = line.replace('<h6>', '<h6 class="western">')
   
    return(line)

def getBookImages(ldir, cover_image):
    """ Initialize image filename storage(used later) 
    """
    flist = os.listdir(ldir)  
    
    # get a list of images used in the ebook
    images = []
    for fname in flist:
        if cover_image in fname:
            continue
        if ('.jpeg' in fname or \
            '.jpg' in fname or \
            '.bmp' in fname or \
            '.png' in fname or \
            '.gif' in fname):        
            images.append(fname) 

    if len(images) == 0:     
        print('\n >>> Warning: No ebook images were found.\n')
    else:
        print(' -- Found ' + str(len(images)) + ' ebook images in your local dir')    
                          
    return(images)
    
    
def removeNonAsciiChars(line):
    """ Converts non-ascii to ascii chars. 
        Used for the file split to format the 
        xhtml file names for the zip file 
        which can only handle ascii chars
    """
    
    line = line.replace('“', '"')
    line = line.replace('”', '"')
    line = line.replace("’", "'")
    line = line.replace('…', '...')
    #line= line.replace('_', ' ')
    
    return(line)
    
def splitHeaders2Files(dir, file, inpath, title):
    """ Splits the html file into separate xhtml chapter files 
    """
    
    out_path = ''
    header = ''
    write_bodytext = False
    file_path_names = []
    pure_header_names = []
    
    # remove all Level 1 html tags
    showHTMLBodyOnly(dir, inpath)
    
    print(' -- Split all chapters/headers into separate xhtml files') 
    print(' -- Add meta headers to all the new html header files')
     
    # split the headings into separate xhtml 
    # files to help create the epub structure
    with open(inpath, 'rt', encoding=('utf-8')) as infp: 
        from string import capwords
        from hformat import addHTMLTail
        first_time = True
        
        for line in infp:
            
            if ('<h1' in line or first_time) and removeWrap(line) != 'Table of Contents' and  removeWrap(line) != 'Contents':            
                first_time = False
                
                # remove all tags containing no text                
                liner = BeautifulSoup(line, 'html.parser').text
                if liner.strip() == '':
                    first_time = True
                    continue
                
                # clean up file names
                line = removeNonAsciiChars(line)
                
                # reformat the header names
                html = BeautifulSoup(line, 'html.parser')
                header = html.get_text()
                header = header.replace('\n','')          
                line_save = header   
                
                # remove apostrophe's in the header that
                # are not allowed in zip file names
                header = header.replace("'", "")
                header = header.replace('"', '')
                
                # if chapters are only numeric then 
                # add 'Chapter' to file name 
                if header.isdigit() and len(header) <= 3:
                    header = 'Chapter' + ' ' + header
                               
                # remove all spaces in header names 
                # space are not allowed in zip file names
                header = header.strip()
                header = header.replace(' ', '_')
                header = header + '.xhtml'
                if header == '.xhtml':
                    header = ''
                    continue    
                
                header = educateHeader(header)      # removes all ascii apostrophe's, quotes etc from headers   
                
                # create the header file lists
                out_path = dir + os.sep + header.lower()
                outfp = open(out_path, 'wt', encoding=('utf-8'))
                file_path_names.append(out_path)
                pure_header_names.append(line_save.lower())
                
                # add the html meta headers
                addMainHeaders(outfp, line_save)
                outfp.write(line)
                write_bodytext = True
   
            # write the body text for each main heading
            elif write_bodytext == True:
                # remove all tags containing 
                # no text but allow images                
                liner = BeautifulSoup(line, 'html.parser').text
                if '<img' not in line and \
                    '<div' not in line and '</div>' not in line:                
                    if liner.strip() == '':
                        continue                    
                outfp.write(line.replace('\t', ''))
                
    outfp.close()
    
    # change the title file name 
    old_name = file_path_names[0]
    file_path_names[0] = dir + os.sep + 'title.xhtml'     
    os.rename(old_name, file_path_names[0])
        
    # add </body> and </html> to the 
    # end of each xhtml text file, 
    # remove html clutter, tidy up 
    # and prettify files
    i = 0
    while i < len(file_path_names):   
        addHTMLTail(dir, file_path_names[i])  
        docTidyNoWrap(dir, file_path_names[i])
        prettifyXHTMLFile(dir, file_path_names[i])  
        i = i + 1
    
    # convert file name paths to basenames
    max_len = len(file_path_names)
    i = 1
    while i < max_len:
        file_path_names[i] = os.path.basename(file_path_names[i])
        i = i + 1
    
    return(file_path_names, pure_header_names)       
        
def getImageSize(image):
    """ Uses PIL to get image dimensions 
    """
    image = image.replace('\n', '')
    im = Image.open(image)
    ht = im.size[1]        
    wd = im.size[0]        
    return(wd, ht)
        
def educateHeader(header):
    """ Manipulate ASCII chars
    """
    
    header = header.replace('"', '')
    header = header.replace("'", "")
    header = header.replace('-', '_')
    header = header.replace('—', '_')
    header = header.replace('*', '_')
    header = header.replace('<', '_')
    header = header.replace('>', '_')
    header = header.replace('?', '_')
    header = header.replace(':', '_')
    header = header.replace('/', '_')
    header = header.replace('\\', '_')
    header = header.replace('|', '_')
    
    # Windows doesn't like path components that end with a period
    if header.endswith('.'):
        header = header[:-1]+'_'
    
    # Mac and Unix don't like file names that begin with a full stop
    if len(header) > 0 and header[0:1] == '.':
        one = '_' + header[1:]        
    
    # remove all non-essential unicode header formatting
    header = header.replace("&#8217;", "")  # remove single quote apostrophes
    header = header.replace("&#8216;", "")
    header = header.replace("&#8220;", "")  # remove double quotes
    header = header.replace("&#8221;", "") 
    
    return(header)
    
def fixEncodingErrors(line):
    """ Fixes encoding problems caused by 
        en dash, em dash, curly qutes, elipses etc
    """    
    # repair mixed encoding
    # cp 1252 to utf-8
    line = line.replace('â€™','’')       # apostrohe   
    line = line.replace('â€œ','“')       # left double quote 
    line = line.replace('â€','”')        # right double quote    
    line = line.replace('Â©','©')        # copyright
    line = line.replace('Â®','®')        # registered
    line = line.replace('â€”', '—')      # em dash
    line = line.replace('â€“', '–')      # en dash
    line = line.replace('â„¢', '™')
    line = line.replace('â”', '–')
    
    # latin-1 encoded in win cp1252 in utf-8 code
    line = line.replace('Ã¢â‚¬Å“', '“')
    line = line.replace('Ã¢â‚¬Â', '”')
    line = line.replace('Ã¢â‚¬â„¢', '’')
    
    line = line.replace('â', '“')
    line = line.replace('â', '”')
    line = line.replace('â', '’')
    line = line.replace('â', '‘')
    line = line.replace('â', '–')
    line = line.replace('â¦', '…')  
    
    # other encodings to utf-8
    line = line.replace('Ã¢â‚¬Å“','“')   # left double quote
    line = line.replace('¢â‚¬Â','”')     # right double quote    
    line = line.replace('Ã¢â‚¬â„¢','’')  # apostrohe, right single quote
    line = line.replace('Ã¢â‚¬Ëœ', '‘')  # left single quote 
    line = line.replace('Ã¢â‚¬“','–')    # en dash
    line = line.replace('Ã¢â‚¬”', '—')
    line = line.replace('Ì¶', '–')
    line = line.replace('Ã”', '”')
    line = line.replace('Ã”', '”')    
    line = line.replace('Ã”Ã', '”')
    line = line.replace('Ã', '')
    line = line.replace('Ãƒâ€šÃ‚Â', '')
    line = line.replace('ÃƒÂ¢Ã¢â€šÂ¬Ã¢â€žÂ¢', '’')
    line = line.replace('ÃƒÂ¢Ã¢â€šÂ¬Ã…â€œ', '“')
    line = line.replace('ÃƒÂ¢Ã¢â€šÂ¬Ã‚Â', '”')
    line = line.replace('ÃƒÂ¢Ã¢â€šÂ¬Ã¢â‚¬Å“','–')
    line = line.replace('ÃƒÂ¢Ã¢â€šÂ¬Ã‹Å“', '‘')
    
    
    line = line.replace('Â', '')
    line = line.replace('Â', '')
    line = line.replace('Â', '’')
    
    line = line.replace('', '’') 
    line = line.replace('', '—')   
    line = line.replace('', '“')    
    line = line.replace('', '”')  
    line = line.replace('', '‘')    
    line = line.replace('', '…')
    line = line.replace('', '–')
    line = line.replace('© ', '©')
    return(line)


def removeHTMLTags(line):
    """ Removes html tags
    """
    soup = BeautifulSoup(line, 'html.parser')
    line = soup.get_text()    
    return(line)
    
 
def checkMainFile(file):
    """ Checks that the HTML file path exists
    """
    print(' -- User input validation checks...')
    fname = os.path.basename(file)
    if os.path.isfile(file):
        if fname.endswith('.html'):
            print(' -- Main html file found...PASS')
            return(0)
    
    print('\n >>> Error: The html file does not exist.') 
    mbox.showerror('User Input Error', 'Your selected input html file ' + \
                   'either does not exist or you have chosen a file ' + \
                   'with the wrong extension. Your selected file should ' + \
                   'be an ODF html document. ' + \
                   '\n\nPlease check the html file path and try again.')
    shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)               
    return(1)
 
 
def checkCoverImageFile(img_file):
    """ Checks that the cover image file path exists
    """
    if os.path.isfile(img_file):
        print(' -- eBook cover file found...PASS')
        return(0)
    
    print('\n >>> Error: The cover image file does not exist.')     
    mbox.showerror('User Input Error', 'The cover image file path does not exist.\n\n' + \
                   'Please check and try again')           
    shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)     
    return(1)
                        
                        
def validateHTMLFile(file):
    """ Validates html input file
    """
    
    if checkDocType(file) == 1:
        shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)
        return('FAIL')
        
    if checkHeadingFormat(file) == 1:
        shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)
        return('FAIL')    
        
    return('PASS')    
    

def sanitizeHTML(file, wdir, ldir):
    """ clean, fix and sanitize the html
    """
    print(' ')
    print(' -- Clean, fix and sanitize html garbage code...')
    print(' -- Fix mixed encoding errors')
    print(' -- Remove adhoc garbage code...')
    
    # open html file for cleaning
    outfile = 'tempo.htm'
    outfile= wdir + os.sep + outfile
    outfp = open(outfile, 'wt', encoding='utf-8')
    infp = open(file, 'rt', encoding='utf-8')
    
    # clean out html file
    logmsg19()
    for line in infp:
         
        # fix mixed encoding problems 
        line = fixEncodingErrors(line)
        
        # change/remove tags, attributes and prefixes 
        line = educateHTML(line) 
        
        # remove any lone mangled bold tags         
        if line.strip().startswith('<b>') and line.strip().endswith('</b>'):
            continue        
            
        # remove any lone mangled span tags         
        if line.strip().startswith('<span ') and line.strip().endswith('</span>'):
            continue
            
        # unwrap all chapter and main header
        # in the html to avoid read errors
        if line.startswith('<h1') and '</h1>' not in line:
            line = line.strip()       
            
        #if 'href="#' in line:
        #    continue        
            
        if r'&#160' in line:
            line = line.replace(r'&#160;', ' ')        
            
        if 'dir="ltr"' in line:
            line = line.replace(' dir="ltr"', '')        

        if '@media' in line:
            line = '\r\n' + line        
            
        if '<div' in line or '</div>' in line:
            continue    
            
        if '_normal' in line:
            line = line.replace(' _normal', '')            
               
        line = line.replace('</div>\n', '')
        line = line.replace('</body>\n', '')
        line = line.replace('</html>\n', '')
        line = line.replace('</body></html>', '')    
       
        # remove TOC and bookmarks
        #if 'href="#' in line:
        #    continue        
            
        if '\n</p>' in line:
            line = line.replace('\n</p>', '</p>')        
            
        if '<!--'in line or '-->' in line or \
        '/* Style Definitions */' in line or \
        '/* Font Definitions */' in line:
            continue     
               
        if line.strip() == '':                 
            continue      
        
        if '<p' in line and'</p>\n' not in line:
            line = line + ' '
            line = line.replace('\n', '')
        
        if '><p' in line:
            line = line.replace('><p', '>\n<p')    
        
        if line == '<p>\n':    # remove empty or isolated p tags
            continue         
            
        if '<div ' in line:
            continue    

        if line.startswith('<p'):
            line = '\n' + line 

        if '<b></b>' in line:
            line = line.replace('<b></b>', '')    
       
        if ' STYLE="page-break-before: always;"' in line:
            line = line.replace(' STYLE="page-break-before: always;"', ';')
        elif ' STYLE="page-break-before: always"' in line:
            line = line.replace(' STYLE="page-break-before: always"', '') 
            
        outfp.write(line)
               
    infp.close()
    outfp.close()
    
    newfile = wdir + os.sep + os.path.basename(file)
    shutil.copy2(outfile, newfile)
    #os.remove(outfile)
    
    return(newfile)


def checkHeadingFormat(file):
    """ Checks that heading 1 style 
        has been used in the html 
    """
    with open(file, 'rt', encoding=('utf-8'), errors='ignore') as fp:
        for line in fp:
            if '<h1 ' in line.lower():
                print(' -- "Heading 1" style is used in the input html file....PASS')            
                return(0)    
            
    root = tk.Tk()
    root.withdraw()
    root.quit() 
    print('\n >>> Critial Error: "Heading 1" style is not used in your OpenDoc html file...FAIL\n')
    mbox.showerror('OpenDoc Format Error', '"Heading 1" paragraph style was not detected in your OpenDoc html file. ' + \
                   'This means that the plugin app cannot build the TOC or XML structure for your epub.' + \
                   '\n\nYou must ensure that you always use "Heading 1" paragraph style to mark all your ' + \
                   'ebook chapters/headers that you want in your generated epub TOC. \n\nPlease reformat your OpenDoc ' + \
                   'headers appropriately using "Heading 1" style as advised and try again.')
    shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)                    
    logmsg32()
    return(1)


def checkDocType(file):
    """ Checks that doc type is OpenDoc
    """
    print('\n -- Input file validation checks...') 
    with open(file,'rt', encoding=('utf-8'), errors='ignore') as infp:
        for line in infp:
            if '<meta' in line.lower() and ('LibreOffice' in line or 'OpenOffice' in line):
                print(' -- Input html file is in OpenDoc HTML format...PASS')
                return(0)
   
    root = tk.Tk()
    root.withdraw()
    root.quit()
    print('\n >>> Critical Error: Invalid doctype....FAIL')    
    mbox.showerror('Invalid Doc Type Error', 'Your html document is the wrong html document type -- your document must be ' + \
                   'wholly created and written in OpenDoc format(using LibreOffice or OpenOffice only) and saved as an ' + \
                   'HTML(Writer) document to be a viable document for conversion using this plugin.\n\nPlease reformat and save your ' + \
                   'document as an OpenDoc HTML document and try again')
    shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)                    
    logmsg34()
    return(1)
    
 
def convertFile2UTF8(wdir, file, encoder):
    """ Converts input file to utf-8 format
    """
    print(' -- Convert input file to utf-8 if required')
    
    original_filename = file
    output = wdir + os.sep + 'fix_encoding.htm'
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding=encoder).read()  
    
    # safely convert to unicode utf-8 using bs4
    soup = BeautifulSoup(html, 'html.parser')
    outfp.writelines(str(soup))
    
    outfp.close()          
    os.remove(file)
    shutil.copy(output, file)        
    os.remove(output)
    
    return(file)
    
def checkFileEncoding(wdir, file):
    html_encoding = None
    chardet_encoding = ''
    final_encoding = ''
    
    # get the encoding info from the html meta headers   
    text = open(file, 'rt', encoding='iso-8859-1', errors='surrogateescape').read(2048)  
    
    if 'charset=windows-1252' in text.lower():
        html_encoding = 'cp1252'
    elif 'charset=windows-1250' in text.lower():   
        html_encoding = 'cp1250'
    elif 'charset=windows-1253' in text.lower():   
        html_encoding = 'cp1253' 
    elif 'charset=windows-1254' in text.lower():   
        html_encoding = 'cp1254'            
    elif 'charset=windows-1251' in text.lower():   
        html_encoding = 'cp1251'
    elif 'charset=windows-1255' in text.lower():   
        html_encoding = 'cp1255'    
    elif 'charset=windows-1256' in text.lower():   
        html_encoding = 'cp1256'
    elif 'charset=windows-1257' in text.lower():   
        html_encoding = 'cp1257'  
    elif 'charset=us-ascii' in text.lower():   
        html_encoding = 'us-ascii'          
    elif 'charset=ibm437' in text.lower():
        html_encoding = 'cp437' 
    elif 'charset=ibm850' in text.lower():
        html_encoding = 'cp850'
    elif 'charset=ibm852' in text.lower():
        html_encoding = 'cp852'
    elif 'charset=ibm855' in text.lower():
        html_encoding = 'cp855'    
    elif 'charset=iso-8859-1' in text.lower():
        html_encoding = 'iso-8859-1'
    elif 'charset=iso-8859-2' in text.lower():
        html_encoding = 'iso-8859-2'
    elif 'charset=iso-8859-4' in text.lower():
        html_encoding = 'iso-8859-4'    
    elif 'charset=utf-8' in text.lower():
        html_encoding = 'utf-8'
            
    # get the locale encoding, if needed
    if html_encoding == None:
        html_encoding = locale.getpreferredencoding()       
    
    # now get the file encoding using chardet
    rawdata = codecs.open(file, "rb").read(2048)
    result = chardet.detect(rawdata)
    charset_encoding = result['encoding']    
    chardet_encoding = charset_encoding    
     
    # compare the html and chardet encodings and if chardet contains
    # any 'utf' encodings then go with that as a preference
    final_encoding = chardet_encoding
    if (html_encoding != None and \
        chardet_encoding.upper() != html_encoding.upper()) and \
        'utf' not in chardet_encoding.lower():
        final_encoding = html_encoding
        
    # Final test -- open the file normally and read & write it back. If 
    # there is an exception let the user know and stop the plugin app.
    output = wdir + os.sep + 'encoding_test.htm'
    outfp = open(output, 'wt', encoding=final_encoding)
    html = None
    try:
        html = open(file, 'rt', encoding=final_encoding).read()
        outfp.writelines(html)
        outfp.close()
        os.remove(file)
        os.rename(output, file)
    except:  
        pass
        
    if html == None:
        print('\n >>> Critical Error: The html file could not be \n' + \
              ' >>> read because of file encoding problems. To resolve \n' + \
              ' >>> this issue, open your html file in any text editor \n' + \
              ' >>> and save it with UTF-8 encoding. Then try again.')
        mbox.showerror('File Encoding Error', 'The html file could not be read because of file encoding ' + \
                       'problems.\n\n To resolve this issue, open your html file in any text editor ' + \
                       'like Notepad, Notepad++ or TextEdit and save it with UTF-8 encoding. Then try again.')
        shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)                            
        sys.exit(0)              
    
    print(' -- Input file encoding is: ' + final_encoding.upper())
    return(final_encoding)

def cleanExit(wdir):
    shutil.rmtree(wdir, ignore_errors=True)
    return(0)    
    
def convert2EmValues(values):
    new_em_values = []
    for value in values: 
        if value == '0' or value == '0;':
            new_em_values.append(value)
  
        # convert 'pt' values to 'em' 
        elif value.endswith('pt'):
            value = value.replace('pt', '') 
            n_value = float(value)
            print('\n >>> float value...' + str(n_value))
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(value)/Decimal(12.0)
            n_value = Decimal(n_value).normalize()
            if '0.00' in repr(n_value):                            
                n_value = 0
                strval = str(n_value)
            else:                
                strval = str(n_value) + 'em'
            print(' >>> decimal value...' + strval + '\n')
            new_em_values.append(strval)

        # convert 'in' values to 'em'     
        elif value.endswith('in'):
            value = value.replace('in', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'    
                new_em_values.append(value)
                continue
            n_value = Decimal(n_value) * Decimal(6.0225)
            n_value = Decimal(n_value).normalize()
            if '0.00' in repr(n_value):                            
                n_value = 0
                strval = str(n_value)
            else:         
                strval = str(n_value) + 'em'
            new_em_values.append(strval)
           
        # convert 'cm' values to 'em'    
        elif value.endswith('cm'):
            value = value.replace('cm', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(n_value) * Decimal(2.3710)
            n_value = Decimal(n_value).normalize()
            if '0.00' in repr(n_value):                            
                n_value = 0
                strval = str(n_value)
            else:         
                strval = str(n_value) + 'em'
            new_em_values.append(strval)

        # convert 'mm' values to 'em'       
        elif value.endswith('mm'):
            value = value.replace('mm', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(n_value) * Decimal(0.2371)
            n_value = Decimal(n_value).normalize()            
            if '0.00' in repr(n_value):                            
                n_value = 0
                strval = str(n_value)
            else:                     
                strval = str(n_value) + 'em'
            new_em_values.append(strval)
            
        # convert 'pc' values to 'em'      
        elif value.endswith('pc'):
            value = value.replace('pc', 'em') 
            new_em_values.append(value)

    # convert the list back to a formatted string        
    new_ems = ",".join(new_em_values)
    new_ems = new_ems.replace(',', ' ')
    new_ems = ': ' + new_ems.strip() + ';\n'
 
    return(new_ems) 
       
    
def convertAbs2RelCSSValues(wdir):
    """ Converts absolute to relative values in the CSS 
    """
    file = os.path.join(wdir, 'stylesheet.css')
    print(' -- Convert absolute to relative values in the CSS')                
    temp = wdir + os.sep + 'absolute.css'      
    outfp = open(temp, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        
        for line in infp:
        
                if 'border:' in line or line.strip().startswith('size'):
                    outfp.write(line)
                    continue     
                
                #remove empty lines
                if line.strip() == '':
                    continue
                    
                if line.strip() == ';':
                    continue        

                # convert the shorthand margin and padding forms to em values in place    
                if 'margin:' in line or 'padding:' in line:
                    if 'em' in line or '%' in line or 'px' in line:
                        outfp.write(line)
                        continue
                    else:
                        property_str, value_str = line.split(':')
                        value_str = value_str.strip().replace(';', '')
                        value_list = value_str.split(' ')
                        if value_str.strip().replace(' ', '').isdigit() or \
                            'em' in value_str or 'px' in value_str or '%' in value_str:
                            outfp.write(line)
                            continue
                        else:    
                            em_val_str = convert2EmValues(value_list)
                            line = property_str + em_val_str
                            outfp.write(line)
                            continue
                                    
                    
                # convert 'in' to relative 'em' values
                if 'in;' in line:
                    if ' 0in;' in line  or ' 0.0in;' in line or '0.00in' in line or '0.000in' in line or '0.0000in' in line:
                        line = line.replace('0in;', '0;')
                        line = line.replace('0.0in;', '0;')
                        line = line.replace('0.00in;', '0;')
                        line = line.replace('0.000in;', '0;')
                        line = line.replace('0.0000in;', '0;')
                        outfp.write(line)
                        continue
                                       
                    else:
                        print(' >>> trouble line value...' + line)
                        line = line.strip().replace('{', '')   
                        if line.startswith('.'):
                            line = '0' + line
                        out_em, s_line = line.split(':')
                        inch = s_line.replace('in;', '')
                        inchf = float(inch)                                   
                        getcontext().prec = 3                 
                        em = Decimal(inchf) * Decimal(6.022)
                        em = Decimal(em).normalize()
                        if '0.00' in repr(em) or '0.000' in repr(em) or '0.0000' in repr(em):                              
                            em = 0
                            em_str = out_em + ': ' + str(em) + ';\n' 
                        else:                             
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'pt' to relative 'em' values        
                elif 'pt;' in line:
                    if ' 0pt;' in line or ' 0.0pt' in line or '0.00pt' in line or '0.000pt' in line or '0.0000pt' in line:
                        line = line.replace('0pt;', '0;')
                        line = line.replace('0.0pt;', '0;')
                        line = line.replace('0.00pt;', '0;')
                        line = line.replace('0.000pt;', '0;')
                        line = line.replace('0.0000pt;', '0;')
                        outfp.write(line)
                        continue
                         
                    else:    
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        point = s_line.replace('pt;', '')
                        points = float(point)               
                        getcontext().prec = 3                        
                        em = Decimal(points) / Decimal(12.0)
                        em = Decimal(em).normalize()
                        if '0.00' in repr(em) or '0.000' in repr(em) or '0.0000' in repr(em):                              
                            em = 0
                            em_str = out_em + ': ' + str(em) + ';\n' 
                        else:                              
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'cm' to to relative 'em' values         
                elif 'cm;' in line:
                    if ' 0cm;' in line or ' 0.0cm' in line or '0.00cm' in line or '0.000cm' in line or '0.0000cm' in line:
                        line = line.replace('0cm;', '0;')
                        line = line.replace('0.0cm;', '0;')
                        line = line.replace('0.00cm;', '0;')
                        line = line.replace('0.000cm;', '0;')
                        line = line.replace('0.0000cm;', '0;')
                        outfp.write(line)
                        continue

                    else:    
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        cms = s_line.replace('cm;', '')
                        new_cms = float(cms)
                        getcontext().prec = 3                                       
                        em = Decimal(new_cms) * Decimal(2.3710)
                        em = Decimal(em).normalize()
                        if '0.00' in repr(em) or '0.000' in repr(em) or '0.0000' in repr(em):                              
                            em = 0
                            em_str = out_em + ': ' + str(em) + ';\n' 
                        else:    
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'mm' to relative 'em' values         
                elif 'mm;' in line:
                   
                    if ' 0mm;' in line or ' 0.0mm' in line or '0.00mm' in line or '0.000mm' in line or '0.0000mm' in line:
                        line = line.replace('0mm;', '0;')
                        line = line.replace('0.0mm;', '0;')
                        line = line.replace('0.00mm;', '0;')
                        line = line.replace('0.000mm;', '0;')
                        line = line.replace('0.0000mm;', '0;')
                        outfp.write(line)
                        continue

                    else:
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        mms = s_line.replace('mm;', '')
                        new_mms = float(mms)               
                        getcontext().prec = 3
                        em = Decimal(new_mms) * Decimal(0.2371)
                        em = Decimal(em).normalize()
                        if '0.00' in repr(em) or '0.000' in repr(em) or '0.0000' in repr(em):                              
                            em = 0
                            em_str = out_em + ': ' + str(em) + ';\n' 
                        else:    
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue    
                                
                # convert 'pc' to to relative 'em' values         
                elif 'pc;' in line:
                    line = line.replace('pc;', 'em;')
                    outfp.write(line)
                    continue

                else:
                    outfp.write(line)    
             
    outfp.close()
    os.remove(file)
    os.rename(temp, file)    
    return(0)        
   
    
    # convert all 0.0000em values to 0em
    temp = wdir + os.sep + 'absolute.tmp'      
    outfp = open(temp, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp: 
        for line in infp:
            if '0.0000em;' in line:
                line = line.replace('0.0000em;', '0;') 
                outfp.write(line)
                continue
            else:
                outfp.write(line) 
    outfp.close()
    os.remove(file)
    os.rename(temp, file)       
    return(0)    

def transformInternalLinks(wdir, files):
    """ 
        This function converts all html style href links 
        to the appropriate epub style page link.
    """
    link_dict = dict()
    
    for file in files:
        file = os.path.join(wdir, os.path.basename(file))
        output = wdir + 'intLinks.html'
        outfp = open(output, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf8').read() 
        
        soup = BeautifulSoup(html, 'html.parser')
       
        # gather the ids and and create and save them as a list of page links
        for a_tag in soup.find_all(['a', 'p', 'h1', 'h2', 'h3', 'h4', 'div']):
            if a_tag.has_attr('id'):
                link_dict[a_tag['id']] = '../Text/' + os.path.basename(file) + '#' + a_tag['id']   # create the epub href for that id 
                
        outfp.writelines(str(soup))
        outfp.close()
        os.remove(file)
        os.rename(output, file) 
        
    # now convert html hrefs to epub page links     
    for file in files:
        file = os.path.join(wdir, os.path.basename(file))
        output = wdir + 'intLinks2.html'
        outfp = open(output, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf8').read() 
        
        soup = BeautifulSoup(html, 'html.parser')

        for link in soup.find_all(['a']):
            if link.has_attr('href') and '#' in str(link):
                for href in link_dict.keys():
                    if link['href'] == '#' + href:
                        del link['href']
                        link['href'] = link_dict[href]  # change thtml href to the appropriate epub href    
                        
        outfp.writelines(str(soup))
        outfp.close()
        os.remove(file)
        os.rename(output, file)
        
    return(0)                      
    
def removeWrap(line):
    if line == None:
        return('')
    else:    
        soup = BeautifulSoup(line, 'html.parser')
        line = soup.get_text()
        line = line.strip()
    return(line)    
    
def removeDocTOC(wdir, file):
    output = os.path.join(wdir, 'remove_TOC.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:     
        
        for line in infp:    
            soup = BeautifulSoup(line, 'html.parser')
            for atag in soup.find_all('a'):
                if atag.has_attr('href') and '#' in atag['href'] and ('toc' in atag['href'].lower() or 'RefHeading' in atag['href']):
                    line = ''
                    outfp.write(line)
                    continue    
            outfp.write(line)                
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    
    output = os.path.join(wdir, 'remove_TOC2.html')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf8').read()
    
    soup = BeautifulSoup(html, 'html.parser')
    
    for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4']):
        if tag.get_text().strip() == 'Table of Contents' or tag.get_text().strip() == 'Contents':
            tag.decompose()
  
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)           

def repairHTMLIDs(wdir, file):
    outfile = os.path.join(wdir, 'html_repair.html')
    outfp = open(outfile, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()
    
    soup = BeautifulSoup(html, 'html.parser')
    
    # convert 'name' to 'id' 
    for anchor in soup.find_all('a'):
        if anchor.has_attr('name'):
            anchor['id'] = anchor['name']
            del anchor['name']

    print('\n >>> Check xhtml file ids and hrefs...')
    ### check and repair the xhtml text file ids
    search_tags1 = ['a', 'p', 'body', 'div', 'h1']
    for a_tag in soup.find_all(search_tags1):
        if a_tag.has_attr('id'):
            old_id = a_tag['id']
            new_id = checkandRepairID(a_tag['id'])
            a_tag['id'] = new_id                       
  
    search_tags2 = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']        
    for h_tag in soup.find_all(search_tags2):
        if h_tag.has_attr('id'):
            old_id = h_tag['id']
            new_id = checkandRepairID(h_tag['id'])
            h_tag['id'] = new_id             
   
    ### check and repair links 
    for href_tag in soup.find_all('a'):
        if href_tag.has_attr('href') and '#' in href_tag['href']:
            link, id = href_tag['href'].split('#')
            print(' >>> href id...' + id)
            new_id = checkandRepairID(id)
            href_tag['href'] = link + '#' + new_id          
                
    outfp.writelines(str(soup))  
    outfp.close()    
    os.remove(file)
    os.rename(outfile, file)

def checkandRepairID(id):
    id = id.strip()
    print('>>> Checking ID value...' + id)
    
    # replace spaces with underscore
    id = id.replace(' ', '_')
    
    # ensure first char is always an alpha char
    char_list  = list(id)
    first_char = char_list[0]
    if first_char.isdigit():
        new_id = 'x' + id
        print('\n >>> New ID...' + new_id + '\n')         
        return(new_id)
    else:
        return(id)    
        