#!/Python3/python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

import os, os.path, sys, codecs, shutil, inspect, chardet, re, time
from decimal import *
from tempfile import mkdtemp                  
from PIL import Image
import options

from doc_tidy import *

import locale 
import lxml.html.clean as clean
import tkinter as tk
import tkinter.messagebox as mbox

__all__=["checkStyles", "prettifyXHTMLFile", "moveHTMLStyles2CSS", "createLinkRel", "moveInlineStyles2CSS", "show_msgbox", "show_yesnobox", "addDOCTYPEHeader", "reformatTidyStyles", "prettifyCSS", "prettifyCSS2", "addHTMLHeaders", "removeHTMLTop", "addHTMLTop", "addHTMLTail", "writeFiles2CSS", "convertAbs2RelCSSValues", "convert2EmValues", "setDefaultFont", "repositionAnchorEndTags", "removeUnusedBookmarks"]

try:
    from sigil_bs4 import BeautifulSoup, Comment
except:
    from bs4 import BeautifulSoup, Comment  
    
def checkStyles(wdir, fnames):
    #  check if the input file is an HTML file   
    file = os.path.join(wdir, fnames[0])
    with open(file, 'rt', encoding='utf-8') as fp:       
        for line in fp: 
            if '<style>' in line or '<style type="text/css">'in line:          
                options.HTML_STYLES = True
                break                 
    return(0)    
    
def prettifyXHTMLFile(wdir, file):
    # reformat and prettify the XHTML file
    outfile= os.path.join(wdir, 'final_one.css')
    infp = open(file, 'rt', encoding='utf-8')
    outfp = open(outfile, 'wt', encoding='utf-8')
     
    for line in infp:
            
        if line.strip() == '<style>':
            line = '<style type="text/css">'
            outfp.write(line)
            continue            
            
        if line.strip().startswith('<body'):
            line = line.strip()
            line = '<body>\n\n'
    
        if '<p></p>' in line.strip() or \
            '<p> </p>' in line.strip():
            continue
         
        if line.strip().startswith('<a') or \
            line.strip().startswith('<span'):
            continue
        
        if '<font>' in line:
            line = line.replace('<font>', '').replace('</font>', '')
            
        line = line.replace(r'&nbsp;', ' ')
        line = line.replace(r'&#160;', ' ')        
        line = line.replace(r'&amp;#160;', ' ')
        line = line.replace(r'&amp;#nbsp;',r'') 
        line = line.replace(r"&#146;", "’")         
        line = line.replace(r"&amp;#146;", "’")      
        line = line.replace(r'&amp;#9;', '')
        line = line.replace("<!--?xml version='1.0' encoding='utf-8'?-->", "")
        
        if line.strip().startswith('<?xml') or \
            line.strip().startswith('<!DOCTYPE') or \
            line.strip().startswith('<html') or \
            line.strip().startswith('<head>') or \
            line.strip().startswith('<meta')or \
            line.strip().startswith('<title>') or \
            line.strip().startswith('<link') or \
            line.strip().startswith('</head>') or \
            line.strip().startswith('<body'):
            line = line.strip()
            if not line:
                continue
            if line.startswith('<meta') or \
                line.startswith('<title>') or \
                line.startswith('<link'):
                line = '  ' + line      
            if line.startswith('<body'):
                line = '\n' + line
            if line.startswith('</body>'):
                outfp.write('\n' + line.rstrip() + '\n')
            else:
                outfp.write(line.rstrip() + '\n')    
        else:
            line = line.strip() 
            if not line:
                continue
            if line.startswith('<p'):
                line = '  ' + line            
            outfp.write('\n' + line + '\n')    
            
    infp.close()
    outfp.close()
    os.remove(file)
    os.rename(outfile, file)
    
    outfile= os.path.join(wdir, 'final_one.css')
    outfp = open(outfile, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:
        finish = False
        for line in infp:
            if '<body' not in line and finish == False:
                if line.strip() == '':
                    continue
                if '<html' in line:
                    line = '\n' + line 
                if '</head>' in line:
                    line = line + '\n'                
                outfp.write(line)    
            else:
                finish = True
                outfp.write(line)
            
    outfp.close()
    os.remove(file)
    os.rename(outfile, file)
    return(0)           
    
def moveHTMLStyles2CSS(bk, wdir, file):

    print(' -- Move html <styles> to "stylesheet.css"')
    css = options.CSS_FILE_NAME
    
    if os.path.isfile(os.path.join(wdir, 'stylesheet.css')):
        msg = 'The html <style> section has already been moved to an epub stylesheet("stylesheet.css")!!'
        show_msgbox('Error', msg, msgtype='error')
        sys.exit(0)
    
    new_file = os.path.join(wdir,'new_file.htm')
    outfp2 = open(new_file, 'wt', encoding=('utf-8'))
    css_file = os.path.join(wdir, css)
    outfp = open(css_file, 'wt', encoding='utf8')
    with open(file, 'rt', encoding='utf-8') as infp:
        for line in infp:
            if line == '' or line == '\n':
                continue
            if '<style>' in line or \
                '<style type="text/css">'in line:          
                for line in infp:
                    if '<![CDATA' in line or ']]>'in line or \
                        '<!--' in line or '-->' in line:
                        continue      
                    if '</style>' in line:
                        break
                    else:
                        if line.strip() == '':
                            continue                        
                        outfp.write(line.strip() + '\n')
            else:
                outfp2.write(line.strip() + '\n')  
            
    outfp.close() 
    outfp2.close()
    os.remove(file)
    os.rename(new_file, file)  
     
    # link the stylesheet to the html file
    css_path = os.path.join(wdir, css)
    createLinkRel(wdir,file)
    prettifyCSS(wdir, css_path)
    prettifyXHTMLFile(wdir, file)
    return(0)
    
def createLinkRel(wdir,file):   
    print(' -- Create html link to new CSS')
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:      
        for line in infp:
            if line.strip() == '</head>':
                line = '<link rel="stylesheet" href="../Styles/stylesheet.css"' + \
                       ' type="text/css"/>\n</head>\n'
                outfp.write(line)
            else:
                outfp.write(line)
                
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)     

def moveInlineStyles2CSS(bk, wdir, file):

    css = options.CSS_FILE_NAME
    
    # move inline styles to the <style> section in html 
    docTidy(wdir, file)
    addDOCTYPEHeader(wdir, file)
    reformatTidyStyles(wdir, file)
    
    # reformat the style section from line to stacked format
    finish = False
    output = os.path.join(wdir, 'styler.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:    
        for line in infp:
            if line == '' or line == '\n':
                continue
                           
            if '<![CDATA' in line or ']]>'in line or \
                '<!--' in line or '-->' in line:
                continue      
            if '</style>' not in line and finish == False:         
                line = line.replace('{', ' {\n')
                line = line.replace(';', ';\n')
                line = line.replace('}', '\n}\n')
                line= line.lstrip()
                outfp.write(line.strip() + '\n')
            else:
                finish = True                          
                outfp.write(line.strip() + '\n')
                
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    
    # move the styles section to a new stylesheet
    finish = False
    output1 = os.path.join(wdir, css)
    output2 = os.path.join(wdir, 'remove_styling.html')
    outfp1 = open(output1, 'at', encoding='utf-8')
    outfp2 = open(output2, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:    
        # for html with a <style> section
        for line in infp:            
            #if '<style>' in line or '<style type="text/css">' in line:               
            if 'sgc-' in line and finish == False:
                line = line.replace('{', '{\n')
                line = line.replace(';', ';\n')
                line = line.replace('}', '\n}\n')
                outfp1.write(line.strip() + '\n')                     
                for line in infp:
                    if '</style>' not in line:
                        if '/*<![CDATA[*/' in line or '/*]]>*/' in line:
                            continue
                        if line.strip() == '':
                            continue 
                        line = line.replace('{', '{\n')
                        line = line.replace(';', ';\n')
                        line = line.replace('}', '\n}\n')
                        outfp1.write(line.strip() + '\n')                    
                    else:
                        finish = True                        
                        break
            outfp2.write(line)                            
                
    outfp1.close()
    outfp2.close()
    os.remove(file)
    os.rename(output2, file)
    
    outfile= os.path.join(wdir, 'final_one2.css')
    outfp = open(outfile, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as fp:
        data = fp.read()
        if '<style type="text/css">\n</style>\n' in data:
            data = data.replace('<style type="text/css">\n</style>\n', '')
        
    outfp.writelines(data)        
    outfp.close()
    os.remove(file)
    os.rename(outfile, file)     
    
    print(' -- Move html inline styling to "inline_styles.css"')
    css_path = os.path.join(wdir, css)
    prettifyCSS2(wdir, css_path)
    prettifyXHTMLFile(wdir, file)
    
    print('\n >>> New inline css file...' + css + '\n')
    return(0)
    
def show_msgbox(title, msg, msgtype='info'):
    """ For general information, warnings and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showinfo(title, msg))
    elif msgtype == 'warning':
        return(mbox.showwarning(title, msg))
    elif msgtype == 'error':
        return(mbox.showerror(title, msg))

def show_yesnobox(title, msg, msgtype='info'):
    """ For general information, warnings and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showyesno(title, msg))
    if msgtype == 'warning':
        return(mbox.showyesno(title, msg))               
        
def addDOCTYPEHeader(wdir, file):
    output = os.path.join(wdir, 'ostyles1.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:    
        for line in infp:    
        
            if '<![CDATA[' in line or \
                ']]>' in line or \
                '/*' in line:
                continue 
            
            if '<?xml' in line:
                data  = ('<?xml version="1.0" encoding="utf-8"?>\n')
                data += ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n' + \
                        '  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n')
                outfp.write(data)
            else:
                outfp.write(line)
                
    outfp.close()
    os.remove(file)
    os.rename(output, file)        
    return(0)

def reformatTidyStyles(wdir, file):
    finish = False
    output = os.path.join(wdir, 'ostyles1.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:    
        for line in infp:
        
            if '@font-face' in line or \
                '{font-family:' in line or \
                'panose-' in line or \
                'transform:' in line:
                line = ''    

            if line.strip() == '':
                continue                     
        
            if '</style>' not in line and finish == False:     
                line = line.replace('{', '{\n')
                line = line.replace(';', ';\n')
                line = line.replace('}', '\n}\n')
                outfp.write(line.strip() + '\n')
            else:
                finish = True           
                outfp.write(line.strip() + '\n')
                
    outfp.close()
    os.remove(file)
    os.rename(output, file)   
    return(0)
    
def prettifyCSS(wdir, css):
    css = os.path.join(wdir, css)
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
            if line.strip().startswith('mso-style'):
                continue
            if 'Mso' in line:
                line = line.replace('Mso', '')
            if re.match(r'^\s*$', line):
                continue 
            if line.strip() == '':
                continue   
            if 'font-size: pt;' in line:
                continue
            if line.strip() == ';':
                continue  
            if 'text-decoration-skip-ink:' in line:
                continue            
            if ':' in line and ': ' not in line and 'a:' not in line:
                line = line.replace(':', ': ')
            
            line = line.replace('P   {', 'p   {')
            line = line.replace('H1', 'h1')
            line = line.replace('H2', 'h2')
            line = line.replace('H3', 'h3')
            line = line.replace('H4', 'h4')
            line = line.replace('H5', 'h5')
            line = line.replace('H6', 'h6')
            line = line.replace('P.', 'p.')
            line = line.replace('H1.', 'h1.')
            line = line.replace('H2.', 'h2.')
            line = line.replace('H3.', 'h3.')
            line = line.replace('H4.', 'h4.')
            line = line.replace('H5.', 'h5.')
            line = line.replace('H6.', 'h6.')
            line = line.replace('A:link', 'a:link')
            line = line.replace('DIV', 'div')
            line = line.replace('SPAN', 'span')                  
                
            if ' {  {' in line:
                line = line.replace(' {  {', '  {' )      
                
            outfp.write(line.strip() + '\n')
    
    css_name = os.path.basename(css)
    if css_name == 'stylesheet.css':     
        data = 'p.level-1-toc-item  {\n'
        data += 'font-size: 0.9583em;\n'
        data += 'font-weight: bold;\n'
        data += 'margin-top: 0em;\n'
        data += 'margin-bottom: 0.5em;\n'
        data += 'margin-right: 0em;\n'
        data += 'margin-left: 1.5em;\n'
        data += '}\n'
        
        data += 'a:link  {\n'
        data += 'color: #0000FF;\n'
        data += 'text-decoration: underline;\n'
        data += '}\n'        
        
        data += 'p  {\n'             
        data += 'font-style: normal;\n'
        data += 'font-weight: normal;\n'
        data += 'text-align: justify;\n'
        data += 'margin-top: 0;\n' 
        data += 'margin-bottom: 0;\n'
        data += 'padding: 0;\n' 
        data += '}\n'
        
        data += 'h1, h2, h3, h4, h5, h6  {\n'         
        data += 'text-indent: 0em;\n'
        data += '}\n' 
        
        data += 'body  {\n' 
        data += 'font-family: serif;\n'
        data += 'text-align: justify;\n'
        data += 'font-size: 100%;\n'
        data += 'margin: 3% 3% 3% 3%;\n'
        data += '}\n'
        
        outfp.write(data)
       
    outfp.close()
    os.remove(css)
    os.rename(output, css)
    return(0)         
    
def prettifyCSS2(wdir, css):
    css = os.path.join(wdir, css)
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
            if line.strip().startswith('mso-style'):
                continue
            
            if 'Mso' in line:
                line = line.replace('Mso', '')
            
            if re.match(r'^\s*$', line):
                continue 
            
            if line.strip() == '':
                continue   
            
            if 'font-size: pt;' in line:
                continue
            
            if line.strip() == ';':
                continue  
            
            if 'font-family:' in line:
                continue               
            
            if line.strip().startswith('size:'):
                continue 
            
            if '-webkit' in line:
                continue            
            
            if '}\n' in line and '\n}\n' not in line:
                line = line.replace('}\n', '\n}\n')            
                
            if ':' in line and ': ' not in line and 'a:' not in line:
                line = line.replace(':', ': ')                

            line = line.replace('P   {', 'p   {')
            line = line.replace('H1', 'h1')
            line = line.replace('H2', 'h2')
            line = line.replace('H3', 'h3')
            line = line.replace('H4', 'h4')
            line = line.replace('H5', 'h5')
            line = line.replace('H6', 'h6')
            line = line.replace('P.', 'p.')
            line = line.replace('H1.', 'h1.')
            line = line.replace('H2.', 'h2.')
            line = line.replace('H3.', 'h3.')
            line = line.replace('H4.', 'h4.')
            line = line.replace('H5.', 'h5.')
            line = line.replace('H6.', 'h6.')
            line = line.replace('A:link', 'a:link')
            line = line.replace('DIV', 'div')
            line = line.replace('SPAN', 'span')                  
                
            if ' {  {' in line:
                line = line.replace(' {  {', '  {' )      
                
            if 'ul' in line and '{' not in line:
                line = line.replace('ul\n','ul  {\n')

            if 'ol' in line and '{' not in line:
                line = line.replace('ol\n','ol  {\n')
                
                
            outfp.write(line.strip() + '\n')
            
    outfp.close()
    os.remove(css)
    os.rename(output, css)
    convertAbs2RelCSSValues(wdir, css)
    return(0)             
    
def addHTMLHeaders(wdir, file): 
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf8') as infp:      
        data = '<?xml version="1.0" encoding="utf-8"?>\n'
        data += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
        data += '<html xmlns="http://www.w3.org/1999/xhtml">\n'
        data += '<head>\n'
        data += ' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n'
        data += '<title></title>\n' 
        data += '</head>\n\n' 
        data += '<body>\n'
        outfp.write(data)
        for line in infp:
            outfp.write(line)
        outfp.write('</body>\n</html>\n')
    outfp.close()        
    os.remove(file)
    os.rename(output, file)
    return(0)
    

def removeHTMLTop(wdir, file):             
    
    finish = False
    output = os.path.join(wdir, 'remove_top.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:        
        for line in infp:  
            if '<body' not in line and finish == False:
                continue
            else:
                if '<body' in line:
                    line = ''
                    finish = True

            if '</body>' in line or \
               '</html>' in line or \
               '<div></div' in line:
                line = ''
                
            outfp.write(line)            

    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)
    
def addHTMLTop(wdir, file):
    output = os.path.join(wdir, 'add_top.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:          
        data = '<?xml version="1.0" encoding="utf-8"?>\n'
        data += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
        data += '<html xmlns="http://www.w3.org/1999/xhtml">\n'
        data += '<head>\n'
        data += '  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
        data += '<title></title>\n'
        data += '</head>\n'
        data += '\n<body>\n\n'
        outfp.write(data)
        for line in infp:
            outfp.write(line)
         
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    addHTMLTail(wdir, file)   
    return(0)  
    
def addHTMLTail(wdir, file):
    
    output = wdir + os.sep + "tails.html"
    outfp = open(output, 'wt', encoding=('utf-8'))
    infp = open(file,'rt', encoding=('utf-8'))
    
    for line in infp:
        outfp.write(line)
        
    outfp.write('\n</body>\n</html>\n\n')            
    
    outfp.close()
    infp.close()
    os.remove(file)
    os.rename(output, file)   
    return(0)        

def writeFiles2CSS(bk, wdir, ids, s_fnames): 
    # no css files with imported html docs
    count = len(s_fnames)
    if count == 0: 
        return(0) 
        
    i = 0
    print(' ')  
    for file in s_fnames:
        if os.path.getsize(os.path.join(wdir, file)) < 5:
            continue        
        #prettifyCSS(wdir, file) 
        print(' -- Write to epub CSS...' + str(file))        
        with open(os.path.join(wdir, file), 'rt', encoding='utf-8') as fp:              # input file is epub 
            data = fp.read()           
            bk.writefile(ids[i], data)
            i = i + 1
                        
    return(0)                  
    
def convertAbs2RelCSSValues(wdir, file):
    """ Converts absolute to relative values in the CSS 
    """
    allowed = ['margin:',
               'margin-top:',
               'margin-bottom:',
               'margin-left',
               'margin-right',
               'padding:',
               'padding-top:',
               'padding-bottom:',
               'padding-left',
               'padding-right',
               'font-size:'
               ]
                              
    print(' -- Convert absolute to relative values in the CSS')                
    temp = wdir + os.sep + 'absolute.css'      
    outfp = open(temp, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        
        for line in infp:      
                
                # only allow margin, padding and
                # font-size values for conversion
                i = 0
                found = False
                while i < len(allowed):
                    if allowed[i] in line:
                        found = True
                        break
                    else:
                        i += 1
                        continue                 
                                 
                #remove empty lines
                if line.strip() == '':
                    continue
                    
                if line.strip() == ';':
                    continue        
                
                #if 'margin:' in line or 'padding:' or 'border:' or 'border-' in line:
                #    outfp.write(line)
                #    continue
                 
                     
                # convert the shorthand margin and padding forms to em values in place    
                if 'margin:' in line or 'padding:' in line:
                    if 'em' in line or '%' in line or 'px' in line:
                        outfp.write(line)
                        continue
                    else:
                        property_str, value_str = line.split(':')
                        value_str = value_str.strip().replace(';', '')
                        value_list = value_str.split(' ')
                        if value_str.strip().replace(' ', '').isdigit() or \
                            'em' in value_str or 'px' in value_str or '%' in value_str:
                            outfp.write(line)
                            continue
                        else:    
                            em_val_str = convert2EmValues(value_list)
                            line = property_str + em_val_str
                            outfp.write(line)
                            continue
                         
                    
                # convert 'in' to relative 'em' values
                if 'in;' in line:
                    if ' 0in;' in line  or ' 0.0in;' in line or ' 0.00in' in line or ' 0.000in' in line or ' 0.0000in' in line:
                        line = line.replace(' 0in;', ' 0em;')
                        line = line.replace(' 0.0in;', ' 0em;')
                        line = line.replace(' 0.00in;', ' 0em;')
                        line = line.replace(' 0.000in;', ' 0em;')
                        line = line.replace(' 0.0000in;', ' 0em;')
                        outfp.write(line)
                        continue
                                        
                    else:
                        line = line.strip().replace('{', '')   
                        if line.startswith('.'):
                            line = '0' + line
                        out_em, s_line = line.split(':')
                        inch = s_line.replace('in;', '')
                        inch = float(inch.strip().strip('}').strip())   
                        inchf = float(inch)                                   
                        getcontext().prec = 3                 
                        em = Decimal(inchf) * Decimal(6.022)
                        em = Decimal(em).normalize()
                        if '.0' in str(em):
                            em_str = out_em + ': ' + str(int(round(em))) + 'em;\n'
                        else:                             
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'pt' to relative 'em' values        
                elif 'pt;' in line:
                    if ' 0pt;' in line or ' 0.0pt' in line or ' 0.00pt' in line or ' 0.000pt' in line or ' 0.0000pt' in line:
                        line = line.replace(' 0pt;', ' 0em;')
                        line = line.replace(' 0.0pt;', ' 0em;')
                        line = line.replace(' 0.00pt;', ' 0em;')
                        line = line.replace(' 0.000pt;', ' 0em;')
                        line = line.replace(' 0.0000pt;', ' 0em;')
                        outfp.write(line)
                        continue
                         
                    else:    
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        point = s_line.replace('pt;', '')
                        points = float(point.strip().strip('}').strip())               
                        getcontext().prec = 3                        
                        em = Decimal(points) / Decimal(12.0)
                        em = Decimal(em).normalize()
                        if '.0' in str(em):
                            em_str = out_em + ': ' + str(int(round(em))) + 'em;\n'
                        else:                         
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'cm' to to relative 'em' values         
                elif 'cm;' in line:
                    if ' 0cm;' in line or ' 0.0cm' in line or ' 0.00cm' in line or ' 0.000cm' in line or ' 0.0000cm' in line:
                        line = line.replace(' 0cm;', ' 0em;')
                        line = line.replace(' 0.0cm;', ' 0em;')
                        line = line.replace(' 0.00cm;', ' 0em;')
                        line = line.replace(' 0.000cm;', ' 0em;')
                        line = line.replace(' 0.0000cm;', ' 0em;')
                        outfp.write(line)
                        continue

                    else:    
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        cms = s_line.replace('cm;', '')
                        cms = float(cms.strip().strip('}').strip())   
                        new_cms = float(cms)
                        getcontext().prec = 3                                       
                        em = Decimal(new_cms) * Decimal(2.3710)
                        em = Decimal(em).normalize()
                        if '.0' in str(em):
                            em_str = out_em + ': ' + str(int(round(em))) + 'em;\n'
                        else:  
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue
                        
                # convert 'mm' to relative 'em' values         
                elif 'mm;' in line:
                   
                    if ' 0mm;' in line or ' 0.0mm' in line or ' 0.00mm' in line or ' 0.000mm' in line or ' 0.0000mm' in line:
                        line = line.replace(' 0mm;', ' 0em;')
                        line = line.replace(' 0.0mm;', ' 0em;')
                        line = line.replace(' 0.00mm;', ' 0em;')
                        line = line.replace(' 0.000mm;', ' 0em;')
                        line = line.replace(' 0.0000mm;', ' 0em;')
                        outfp.write(line)
                        continue

                    else:
                        line = line.strip().replace('{', '')                   
                        if line.startswith('.'):
                            line = '0' + line          
                        out_em, s_line = line.split(':')
                        mms = s_line.replace('mm;', '')
                        mms = float(mms.strip().strip('}').strip())   
                        new_mms = float(mms)               
                        getcontext().prec = 3
                        em = Decimal(new_mms) * Decimal(0.2371)
                        em = Decimal(em).normalize()
                        if '.0' in str(em):
                            em_str = out_em + ': ' + str(int(round(em))) + 'em;\n'
                        else:  
                            em_str = out_em + ': ' + str(em) + 'em;\n'
                        outfp.write(em_str)
                        continue    
                                
                # convert 'pc' to to relative 'em' values         
                elif 'pc;' in line:
                    line = line.replace('pc;', 'em;')
                    outfp.write(line)
                    continue

                else:
                    outfp.write(line)    
             
    outfp.close()
    os.remove(file)
    os.rename(temp, file)    
    return(0)        
   
    
def convert2EmValues(values):
    new_em_values = []
    for value in values: 
        if value == '0' or value == '0;':
            new_em_values.append(value)
  
        # convert 'pt' values to 'em' 
        elif value.endswith('pt'):
            value = value.replace('pt', '') 
            n_value = float(value)
            print('\n >>> float value...' + str(n_value))
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(value)/Decimal(12.0)
            n_value = Decimal(n_value).normalize() 
            if '.0' in str(n_value):
                strval = str(int(round(n_value))) + 'em'
            else:                                         
                strval = str(n_value) + 'em'
            print(' >>> decimal value...' + strval + '\n')
            new_em_values.append(strval)

        # convert 'in' values to 'em'     
        elif value.endswith('in'):
            value = value.replace('in', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'    
                new_em_values.append(value)
                continue
            n_value = Decimal(n_value) * Decimal(6.0225)
            n_value = Decimal(n_value).normalize()
            if '.0' in str(n_value):
                strval = str(int(round(n_value))) + 'em'
            else:                  
                strval = str(n_value) + 'em'
            new_em_values.append(strval)
           
        # convert 'cm' values to 'em'    
        elif value.endswith('cm'):
            value = value.replace('cm', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(n_value) * Decimal(2.3710)
            n_value = Decimal(n_value).normalize()
            if '.0' in str(n_value):
                strval = str(int(round(n_value))) + 'em'
            else:      
                strval = str(n_value) + 'em'
            new_em_values.append(strval)

        # convert 'mm' values to 'em'       
        elif value.endswith('mm'):
            value = value.replace('mm', '') 
            n_value = float(value)
            if n_value == 0:
                value = '0em'
                new_em_values.append(value)
                continue
            getcontext().prec = 3
            n_value = Decimal(n_value) * Decimal(0.2371)
            n_value = Decimal(n_value).normalize()
            if '.0' in str(n_value):
                strval = str(int(round(n_value))) + 'em'
            else:                  
                strval = str(n_value) + 'em'
            new_em_values.append(strval)
            
        # convert 'pc' values to 'em'      
        elif value.endswith('pc'):
            value = value.replace('pc', 'em') 
            new_em_values.append(value)

    # convert the list back to a formatted string        
    new_ems = ",".join(new_em_values)
    new_ems = new_ems.replace(',', ' ')
    new_ems = ': ' + new_ems.strip() + ';\n'
 
    return(new_ems)     
    
def setDefaultFont(wdir):

    file = os.path.join(wdir, 'stylesheet.css')                
    output = wdir + os.sep + 'default_font.css'      
    outfp = open(output, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        for line in infp:    
            if line.strip() == 'body  {':
                line = 'body {\nfont-family: serif;\n'
         
            outfp.write(line)

    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)    
            
def repositionAnchorEndTags(wdir, filepaths):
    """ 
        Some word-processors, such as Word, will 
        insert lone anchor ids(with no hrefs) that 
        surround the corresponding text string. 
        This is ported to the html file and is the 
        wrong way to format ids in epubs. For instance 
        in ADE, if your epub contains such anchor tags, 
        then the text with these ids will display as blue 
        with underline just like a link.
        
        This function just repositions the anchor end tag 
        to before the text string to avoid the above problems.       
    """
    for file in filepaths:
        output = os.path.join(wdir, 'reformat.html')
        outfp = open(output, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()

        soup = BeautifulSoup(html, 'html.parser')    
        
        for atag in soup.find_all('a'):
            if atag.attrs == {}:
                atag.unwrap()
        
        for a_tag in soup.find_all('a'):
            if a_tag.has_attr('name'):
                old_id = a_tag['name']
                del a_tag['name']
                a_tag['id'] = old_id 
        
        # ensure correct anchor end tag position for all anchor ids
        search_tags = ['p','h1','h2','h3']
        for tag in soup.find_all(search_tags):
            for atag in tag.find_all('a'):
                if atag.has_attr('id') and not atag.has_attr('href'):     
                    if atag.string != None and atag.string != '':                          
                        a_string = atag.string
                        atag.string = ''
                        new_string = soup.new_string(a_string)   
                        tag.append(new_string)
        
        # ensure correct anchor end tag position for all anchor ids    
        tag_search = ['h4','h5','h6']
        for tag in soup.find_all(tag_search):
            for atag in tag.find_all('a'):
                if atag.has_attr('id') and not atag.has_attr('href'):     
                    if atag.string != None and atag.string != '':                             
                        a_string = atag.string
                        atag.string = ''
                        new_string = soup.new_string(a_string)   
                        tag.append(new_string)  

        outfp.writelines(str(soup))
        outfp.close()
        os.remove(file)
        os.rename(output, file)
    
    return(0)    

def getHREF_ID(bk, wdir, fnames):
    
    print('\n>>> In getHREF_ID...')
    href_ids = list()
    
    for file in fnames:
           
        outfile = os.path.join(wdir, 'get_hrefs.html')
        outfp = open(outfile, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()
        
        soup = BeautifulSoup(html, 'html.parser')
        
        # build a list of xhtml href ids
        for atag in soup.body.find_all(href=True):
            if '#' in atag['href'] and \
                'http:' not in atag['href'] and \
                'https:' not in atag['href'] and \
                'mailto:' not in atag['href']:
                id_ref = atag['href'].split('#')[1]
                href_ids.append(id_ref) 
            
    # remove any duplicate ids from the list    
    href_ids = list(dict.fromkeys(href_ids))
    
    return(href_ids)
 
def removeUnusedBookmarks(bk, wdir, files):
    """ Remove all unused ids/bookmarks from 
        epub    
    """
    
    href_ids = getHREF_ID(bk, wdir, files)    
    
    for file in files:
        
        outfile = os.path.join(wdir, 'remove_ids.html')
        outfp = open(outfile, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()
        
        soup = BeautifulSoup(html, 'html.parser')       
              
        #ensure all bookmarks have only 'id' attributes
        for tag in soup.body.find_all(name=True):
            if tag.has_attr('name') and tag['name'] != None and tag['name'] != '': 
                idref = tag['name']
                del tag['name']
                tag['id'] = idref
                
        # remove all bookmarks not in the href id list   
        href_id_str = " ".join(href_ids)        
        for tag in soup.body.find_all(id=True):        
            if tag['id'] not in href_id_str and \
                tag['id'].lower() != 'toc' and \
                tag['id'].lower() != 'cover' and \
                tag['id'].lower() != 'text' and \
                tag['id'].lower() != 'start':
                del tag['id']
                if tag.name == 'a' and tag.attrs == {}:
                    tag.unwrap()
             
        outfp.write(str(soup))             
        #outfp.write(str(soup.prettyprint_xhtml(indent_level=0, eventual_encoding="utf-8", formatter="minimal", indent_chars="  ")))
        outfp.close()
        os.remove(file)
        os.rename(outfile, file)
        prettifyXHTMLFile(wdir, file)
    
    return(0)     
 
def removeUnusedBookmarks2(wdir, filepaths):
    
    href_ids = []
    
    for file in filepaths:
        outfile = os.path.join(wdir, 'remove_ids.html')
        outfp = open(outfile, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()
        
        soup = BeautifulSoup(html, 'html.parser')
        
        #ensure all bookmarks have 'id' attributes
        for tag in soup.find_all(name=True):
            if tag.has_attr('name') and tag['name'] != None and tag['name'] != '':
                idref = tag['name']
                del tag['name']
                tag['id'] = idref
             
        # build a list of internal href ids
        for atag in soup.find_all('a'):
            if atag.has_attr('href') and '#' in atag['href']:
                id_ref = atag['href'].replace('#', '')
                href_ids.append(id_ref)
       
        # remove all bookmark ids not in the href id list   
        href_id_str = " ".join(href_ids)   
        for tag in soup.find_all(id=True): 
            if tag['id'] not in href_id_str:
                del tag['id']
                if tag.name == 'a' and tag.attrs == {}:
                    tag.unwrap()
                
                
        # remove all anchor tags with no string and no attributes                
        for a_tag in soup.find_all('a'):
            if a_tag.attrs == {} and a_tag.string == None:
                a_tag.unwrap()               
    
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(outfile, file)
    return(0)    

                                        