#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

import os, sys, shutil, re, os.path

from log import *
import options
import cutils             
                   
import tkinter as tk
import tkinter.messagebox as mbox
from random import sample
from locale import getdefaultlocale

try:
    from sigil_bs4 import BeautifulSoup
except:
    from bs4 import BeautifulSoup

__all__=["setDialogOptions", "setAllBlockTextStyle", "setBlockTextStyle", "setFictionStyle","transformInlineStyles", "removeHTMLClutter", "cssFinalFormat", "removeBadAttributesfromCSS", "removeLevel1HTMLTags", "adjustCSSBody", "repairCSSErrors", "fixHTML", "moveInlineStyles2CSS", "setGlobalCSSValues", "formatImages", "formatBookImages", "addHTMLHeaders", "addMainHeaders", "addHTMLHeaders1", "addHTMLHeaders3", "addImageFileHeaders", "addHTMLHeaders2", "addHTMLTail", "reformatTidyStyles"]

def setDialogOptions(wdir, filenames):

    if options.SET_FICTION_STYLE:
        setFictionStyle(wdir, filenames)
        
    if options.SET_BLOCKTEXT_STYLE:
        setBlockTextStyle(wdir, filenames)   

    if options.SET_ALL_BLOCKTEXT_STYLE:
        setAllBlockTextStyle(wdir, filenames)    
        
    return(0)
    
def setAllBlockTextStyle(wdir, filenames):
    """ Sets all ebook text to block text format.
        The title page, contents page and cover page
        are all excluded and left unchanged.
    """
    for file in filenames:

        if 'cover' in file.lower() or \
            'contents' in file.lower() or \
            'title' in file.lower():
            continue
            
        output = os.path.join(wdir, 'all_blocktext_style.xhtml')
        file = os.path.join(wdir, file)
        with open(output, 'wt', encoding=('utf-8')) as outfp:
            infp = open(file, 'rt', encoding='utf-8')
            for line in infp:
                # ignore all images and centered text             
                if '<img' not in line and 'ebk-centered-text' not in line:
                    # convert chapter text to blocktext
                    if '<p' in line: 
                        first_para = False
                        soup = BeautifulSoup(line, 'html.parser')   
                        ptag = soup.p
                        if ptag.has_attr('style'):
                            del ptag['style']
                        if ptag.has_attr('class'):
                            del ptag['class']                     
                        ptag['class'] = 'ebk2-blocktext'
                        line = str(ptag)
                        outfp.write(line.strip() + '\n')                                      
                    else: 
                        outfp.write(line.strip() + '\n')      
                else: 
                    outfp.write(line.strip() + '\n')                    
                              
        infp.close()    
        os.remove(file)
        os.rename(output, file)
            
            
    return(0)           

def setBlockTextStyle(wdir, filenames):
    """ Sets only the chapter text or story text to block text format.
        The ebook front matter and back matter are not affected.
    """
    for file in filenames:    
        if 'chapter' in file.lower():       
            first_para = True
            not_chapter_header = False
            output = os.path.join(wdir, 'blocktext_style.xhtml')
            file = os.path.join(wdir, file)
            with open(output, 'wt', encoding=('utf-8')) as outfp:
                infp = open(file, 'rt', encoding='utf-8')
                for line in infp:
                    # ignore all images and centered text             
                    if '<img' not in line and 'ebk-centered-text' not in line:
                        # convert chapter text to blocktext
                        if '<p' in line and not_chapter_header == False: 
                            first_para = False
                            soup = BeautifulSoup(line, 'html.parser')   
                            ptag = soup.p
                            if ptag.has_attr('style'):
                                del ptag['style']
                            if ptag.has_attr('class'):
                                del ptag['class']                     
                            ptag['class'] = 'ebk2-blocktext'
                            line = str(ptag)
                            outfp.write(line.strip() + '\n')                                              
                        elif first_para == False and \
                                ('<h1' in line or \
                                '<h2' in line or \
                                '<h3' in line or \
                                '<h4' in line):
                                not_chapter_header = True
                                outfp.write(line)               
                        else: outfp.write(line)   
                                                    
                    else: 
                        outfp.write(line)                    
                                  
            infp.close()    
            os.remove(file)
            os.rename(output, file)              
        
        else: 
            continue
            
    return(0)       
        
    
def setFictionStyle(wdir, filenames):
    """ Sets only the chapter text or story text to fiction style format.
        The ebook front matter and back matter are not affected. Fiction Style is 
        where the first para in the chapter has no indent while all 
        succeeding chapter paras have an indent. 
    """
    for file in filenames:    
        if 'chapter' in file.lower():       
            first_para = True
            not_chapter_header = False
            output = os.path.join(wdir, 'fiction_style.xhtml')
            file = os.path.join(wdir, file)
            with open(output, 'wt', encoding=('utf-8')) as outfp:
                infp = open(file, 'rt', encoding='utf-8')
                for line in infp:
                    # ignore all images and centered text             
                    if '<img' not in line and 'ebk-centered-text' not in line:
                        # convert chapter first para to text-no-ndent
                        if '<p' in line and first_para and not_chapter_header == False: 
                            first_para = False
                            soup = BeautifulSoup(line, 'html.parser')   
                            ptag = soup.p
                            if ptag.has_attr('style'):
                                del ptag['style']
                            if ptag.has_attr('class'):
                                del ptag['class']
                            ptag['class'] = 'ebk-text-no-indent'
                            line = str(ptag)
                            outfp.write(line.strip() + '\n')                            
                        # convert all succeeding paras to text-with-indent    
                        elif '<p' in line and first_para == False and not_chapter_header == False:
                            soup = BeautifulSoup(line, 'html.parser')
                            ptag = soup.p 
                            if ptag.has_attr('style'):
                                del ptag['style']
                            if ptag.has_attr('class'):
                                del ptag['class']                     
                            ptag['class'] = 'ebk-text-with-indent'
                            line = str(ptag)
                            outfp.write(line.strip() + '\n')
                        # set stop flag if a main heading is encountered(after the chapter text)                        
                        elif first_para == False and \
                                ('<h1' in line or \
                                '<h2' in line or \
                                '<h3' in line or \
                                '<h4' in line):
                                not_chapter_header = True
                                outfp.write(line)      
                        else: outfp.write(line)   
                                                    
                    else: 
                        outfp.write(line)                    
                                  
            infp.close()    
            os.remove(file)
            os.rename(output, file)
                          
        
        else: 
            continue
            
    return(0)       

def transformInlineStyles(wdir, file):
    """ This function matches and transforms all relevant p tag inline
        styling to the appropriate descriptive class name. The class 
        names used are:
        
        ebk-imagestyle
        ebk-centered-text
        ebk-blocktext
        ebk-text-with-indent
        ebk-text-no-indent
        
        These core text classes are already pre-defined in the CSS. This function
        effectively removes inline styling and transforms them to p tag classes 
        with descriptive names. This function also removes alot of unnecessary 
        proprietary data from the html.
    """    
    
    output = wdir + os.sep + 'convert_tags.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    # where possible - convert in-tag styles to named classes             
    for s in soup.findAll('p'):
        if s.has_attr('style'):
            
            # avoid image re-styling(handled later) 
            if '<img' not in str(s):        
                
                # get rid of default p tag classes
                if 'class="western"' in str(s) and s.has_attr('class'):
                    del s['class'] 
                
                # avoid re-styling p tags that are centered and have classes
                if 'align="center"' not in str(s) and \
                    'text-align: center' not in str(s) and \
                    not s.has_attr('class'):
                   
                    # convert to 'blocktext'
                    if ('margin-top:' in str(s) and \
                        'margin-top: 0pc' not in str(s) and \
                        'margin-top: 0in' not in str(s) and \
                        'margin-top: 0pt' not in str(s) and \
                        'margin-top: 0cm' not in str(s) and \
                        'margin-top: 0mm' not in str(s)) or \
                        ('margin-bottom:' in str(s) and \
                        'margin-bottom: 0pc' not in str(s) and \
                        'margin-bottom: 0in' not in str(s) and \
                        'margin-bottom: 0pt' not in str(s) and \
                        'margin-bottom: 0cm' not in str(s) and \
                        'margin-bottom: 0mm' not in str(s)) and \
                        'text-indent:' not in s(str):
                        if s.has_attr('align'):     # remove unnecessary attributes like align:left & align:justify
                            del s['align']
                        if s.has_attr('font'):      # not needed
                            del s['font']
                        s['class'] = 'ebk-blocktext'
                   
                    # convert to 'text-no-indent'       
                    elif 'text-indent:' not in str(s) and \
                        (('margin-top:' not in str(s) and \
                        'margin-bottom:' not in str(s)) or \
                        ('margin-top: 0pc' in str(s) or \
                        'margin-top: 0in' in str(s) or \
                        'margin-top: 0pt' in str(s) or \
                        'margin-top: 0cm' in str(s) or \
                        'margin-top: 0mm' in str(s) or \
                        'margin-bottom: 0pc' in str(s)  or \
                        'margin-bottom: 0in' in str(s) or \
                        'margin-bottom: 0pt' in str(s) or \
                        'margin-bottom: 0mm' in str(s) or \
                        'margin-bottom: 0cm' in str(s))):
                        if s.has_attr('align'):
                            del s['align']
                        if s.has_attr('font'):
                            del s['font']
                        s['class'] = 'ebk-text-no-indent'                
                   
                    # convert to 'text-with-indent'
                    elif 'text-indent:' in str(s) and \
                        (('margin-top:' not in str(s) and \
                        'margin-bottom:' not in str(s)) or \
                        ('margin-top: 0pc' in str(s) or \
                        'margin-top: 0in' in str(s) or \
                        'margin-top: 0pt' in str(s) or \
                        'margin-top: 0mm' in str(s) or \
                        'margin-top: 0cm' in str(s) or \
                        'margin-bottom: 0pc' in str(s)  or \
                        'margin-bottom: 0in' in str(s) or \
                        'margin-bottom: 0pt' in str(s) or \
                        'margin-bottom: 0mm' in str(s) or \
                        'margin-bottom: 0cm' in str(s))):           
                        if s.has_attr('align'):
                            del s['align']
                        if s.has_attr('font'):
                            del s['font']                              
                        s['class'] = 'ebk-text-with-indent' 
                   
                    else: # default is 'text-no-indent', if all else fails!
                        if 'text-indent:' not in str(s):
                            if s.has_attr('align'):
                                del s['align']
                            if s.has_attr('font'):
                                del s['font']     
                            s['class'] = 'ebk-text-no-indent'     
                
                else: # convert to 'centered-text'
                    if not s.has_attr('class'):
                        del s['align']                      
                        s['class'] = 'ebk-centered-text'            
    
    outfp.writelines(str(soup))
    outfp.close()
     
    os.remove(file)
    os.rename(output, file)
    return(0)                    
    
def removeHTMLClutter(wdir, file):
    """ Cleans up and preserves internet links and 
        removes spurious <br> tags from the html
    """
    output = wdir + os.sep + 'remove_anchors.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding='utf-8', errors='ignore').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    # deletes unnecessary spans from http and email lines
    for d in soup.findAll('p'):
        if 'href="http:' in str(d) or \
            'href="https:' in str(d) or \
            '@' in str(d):
            if d.span:
                d.span.replaceWithChildren()            
            
    # allow only http and email in anchors
    for m in soup.findAll('a'):
        if 'href="http:' in str(m) or \
           'href="https:' in str(m) or \
           '@' in str(m):
            pass            
        else:
            del m['href']
            m.replaceWithChildren()               

    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)

    output = wdir + os.sep + 'remove_clutter.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    infp = open(file, 'rt', encoding = 'utf-8')
    
    # remove html clutter
    for line in infp:
        if line.strip() == '<br /></h1>' or \
            line.strip() == '<br /></p>' or \
            line.strip() == '<br />' or \
            line.strip() == '<br/>' or \
            line.strip() == '<br/>' or \
            line.strip() == '<h1>' or \
            line.strip() == '</h1>' or \
            '</body>' in line or \
            '</html>' in line:
            continue
        else:
            outfp.write(line) 

    infp.close()        
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(file)

def cssFinalFormat(wdir):
    # convert any remaining inline attributes to stacked format
    infile = wdir + os.sep + 'stylesheet.css'
    outfile= wdir + os.sep + 'final_one.css'
    infp = open(infile, 'r', encoding='utf-8')
    outfp = open(outfile, 'w', encoding='utf-8') 
    for line in infp:
        # format attribute from line to stacked format
        if '{' in line and ':' in line and '}' in line:
            line = line.replace('{', '  {\n')
            line = line.replace(';', ';\n')
            line = line.replace('}', '}\n')             
            outfp.write(line.strip() + '\n')
    
    infp.close()
    outfp.close()
    os.remove(infile)
    os.rename(outfile, infile)    
    return(0)
    
    
def removeBadAttributesfromCSS(wdir):
    print('\n -- Normalize the CSS file...')
    print(' -- Remove unwanted attributes from the CSS')
    
    file = wdir + os.sep + 'stylesheet.css'
    output = wdir + os.sep + 'removed_fonts.css'
    outfp = open(output, 'w', encoding='utf-8')
    with open(file, 'r', encoding='utf8') as infp:  
        for line in infp: 
            if 'font-family:' in line or \
               'page-break' in line or \
               'position: absolute' in line or \
               'line-height' in line or \
               'widows' in line or \
               'orphans' in line or \
                line == ';\n':
                continue    
                
            else:
                if '-western' in line:
                    line = line.replace('-western', '')
                outfp.write(line)
                          
    outfp.close()
    os.remove(file)
    shutil.copy(output, file)
    os.remove(output)               
               
    return(0)     

def removeLevel1HTMLTags(wdir, file):
    print('\n -- Remove all level 1 html tags')
    
    output = wdir + os.sep + 'remove_tags.html'
    outfp = open(output, 'w', encoding='utf-8')
    with open(file, 'r', encoding='utf8') as infp:  
        for line in infp: 
            if '<!DOCTYPE' in line or \
                '<html ' in line or '<HTML ' in line or \
                '</html> ' in line or '</HTML>' in line or \
                '<head>' in line or '<HEAD>' in line or \
                '</head>' in line or '</HEAD>' in line or \
                '<meta ' in line or '<META ' in line or \
                '<title>' in line or '<TITLE>' in line or \
                '</title>' in line or '</TITLE>' in line or \
                '<style>' in line or '<STYLE>' in line or \
                '<style ' in line or '<STYLE ' in line or \
                '</style>' in line or '</STYLE>' in line or \
                '<body' in line or '<BODY' in line or \
                '</body>' in line or '</BODY>' in line:
                continue
            else:
                outfp.write(line)
                          
    outfp.close()
    os.remove(file)
    shutil.copy(output, file)
    os.remove(output)               
               
    return(0)     
    

def adjustCSSBody(wdir):
    print(' -- Adjust CSS body attributes')
    
    file = wdir + os.sep + 'stylesheet.css'
    output = wdir + os.sep + 'body_repaired.css'
    
    outfp = open(output, 'w', encoding='utf-8')
    with open(file, 'r', encoding='utf8') as infp:  
        for line in infp:
            if 'body.globals  {' in line:
                line = line.replace('body.globals  {\n', 'body.globals  {\nfont-family: "Times New Roman", serif;\n')
                outfp.write(line)        
                outfp.write('line-height: 1.2em;\n')
            else:
                line = line.strip()
                outfp.write(line + '\n') 
                    
    outfp.close()
    os.remove(file)
    shutil.copy(output, file)
    os.remove(output)               
               
    return(0) 
    
    
def repairCSSErrors(wdir):
    print(' -- Remove adhoc garbage from the CSS')
   
    file = wdir + os.sep + 'stylesheet.css'
    output = wdir + os.sep + 'css_repaired.css'
    outfp = open(output, 'w', encoding='utf-8')
    with open(file, 'r', encoding='utf8') as infp:    
        for line in infp:
            if '{' in line or \
               '}' in line or \
                '/*' in line or \
                ':' in line:
                if ':' in line and ';\n' not in line:
                   if '{' not in line and '}' not in line:
                       line = line.strip() + ';\n'
                outfp.write(line)
            
            #change attribute formatting from line to stacked            
            if '{' in line and '}' in line:
                line = line.replace('{', '{\n')
                line = line.replace(';', ';\n')           
                line = line.replace('}\n', '\n}\n')            
            
    outfp.close()
    os.remove(file)
    shutil.copy(output, file)
    os.remove(output)               
               
    return(0) 
              

def fixHTML(wdir, file):
    """ Removes all unnecessary proprietary 
        tags or attributes from the html. 
    """        

    output = wdir + os.sep + 'new_html.htm'
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    for c in soup.find_all('p', 'span'):
        if c['align'] == 'left' or c['align'] == 'LEFT':
                del c['align']              
    
    # remove all unwanted proprietary attributes from the html doc   
    search_tags = ['p', 'img', 'span', 'body', 'h1']  
    search_attribs =  ['dir', 'border', 'title', 'link' ,'text', 'lang', 'clear']  
    for a in soup.findAll(search_tags):
        for attribute in search_attribs:
            del a[attribute]   
            
    # remove all unwanted proprietary attributes from the html doc   
    search_tags = ['h2', 'h3', 'h4', 'h5', 'h6', 'br']  
    search_attribs =  ['dir', 'border', 'title', 'link' ,'text', 'lang', 'clear']  
    for a in soup.findAll(search_tags):
        for attribute in search_attribs:
            del a[attribute]           
            
    # remove unnecessary text decoration        
    for s in soup.findAll('p', 'span'):
        if s.has_attr('style'):
            if 'text-decoration: none' in str(s):
                s['style'] = str(s['style']).replace('text-decoration: none', '')
                    
    for r in soup.find_all(['a']):
        if r.has_attr('name'):
            r['id'] = r['name']
            del r['name']         
    
    # remove 'background: transparent'
    for j in soup.findAll('span'):
        if 'background: transparent' in str(j):
            j.extract()        
            
    # remove any <br> tags in p tag styling
    for g in soup.findAll('p'):
        if g.img and g.br:
            g.br.decompose()                   
    
    # add "alt" to img tags
    for f in soup.findAll('img'):
        if not f.has_attr('alt'):
            f['alt'] = ""
            
    # remove empty anchor tags    
    #for b in soup.findAll('a'):
    #    if len(b.get_text()) == 0:
    #        b.extract()
            
    # add 'font' attributes to 'style' attributes        
    for p in soup.findAll('p'):
        if '<font' in str(p):
            if p.font.has_attr('style'):
                if p.has_attr('style'):
                    if p['style'].endswith(';'):
                        p['style'] = p['style'] + p.font['style']
                        del p.font['style']
                    else:
                        p['style'] = p['style'] + ';' + p.font['style']
                        del p.font['style']                                           
                else:    
                    p['style'] = p.font['style']
                    del p.font['style']
    
    # convert align=center to text-align in 'style' attributes
    for t in soup.findAll('p'):
        if t.has_attr('class') and 'align="center"' in str(t):
            del t['align']
            if t.has_attr('style'):
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + ' text-align: center; text-indent: 0em;' 
                else:
                    t['style'] = t['style'] + '; text-align: center; text-indent: 0em;'
            else:
                t['style'] = 'text-align: center; text-indent: 0em;'
                            
        else:
            if t.has_attr('style') and 'align="center"' in str(t):
                del t['align']
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + ' text-align: center; text-indent: 0em;' 
                else:
                    t['style'] = t['style'] + '; text-align: center; text-indent: 0em;'
    
    # remove all 'style attributes from h1 tags'    
    for h in soup('h1'):
        if h.has_attr('style'):
            del h['style']

    # remove any empty h tags -- this prevents
    # excessive chapter file generation
    # from h1 tags and keeps everything tidy    
    search_tags=['h1', 'h2', 'h3','h4', 'h5', 'h6']
    for y in soup.findAll(search_tags):
        if y.get_text() == '' or y.get_text() == ' ':
            y.extract()              

    # remove all font face declarations        
    for f in soup.findAll('font'):
        if f.has_attr('face'):
            del f['face']
            f.replaceWithChildren()

    # remove all 'size = 3' font declarations        
    for x in soup.findAll('font'):
       if x.has_attr('size'):
           if x['size'] == "3" or x['size'] == 3:
               x.replaceWithChildren()  
     
    # remove empty h1 tags    
    # for h in soup.findAll(['h1', 'p']): 
    for h in soup.findAll(['h1']): 
        if h.get_text() == '' or h.get_text() == ' ':
            h.extract()           
            
    # remove all anchors but preserve 
    # all anchors with internet links    
    #for m in soup.findAll('a'):
    #    if 'href="http:' in str(m) or \
    #       'href="https:' in str(m) or \
    #       '@' in str(m):
    #        pass            
    #    else:
    #        m.decompose()
                            
    #remove lone empty spans
    for s in soup.findAll('span'):
        if '<span>' in str(s):
            s.replaceWithChildren()         
    
    # remove hard line breaks    
    for x in soup.findAll('br'):
        x.extract()   

    # remove empty h1 tags
    search_tags=['h1', 'h2', 'h3','h4', 'h5', 'h6']
    for y in soup.findAll(search_tags):
        if len(y.get_text()) == 0:
            y.extract()            
    
    # remove any p tags with just space
    for x in soup.findAll('p'):
        if x.string == ' ':
            x.decompose()
        
    outfp.writelines(str(soup))
    outfp.close()
    
    os.remove(file)
    os.rename(output, file)

    return(file)
   
def moveInlineStyles2CSS(wdir, file):
    print(' -- Move HTML inline styles to CSS')
    """ This function picks up all the inline styles code 
        generated by Tidy at the top of the html file and 
        moves it to the epub stylesheet. So all inline 
        styles are reformatted and moved to the CSS.
    """
    reformatTidyStyles(wdir, file)
    
    # copy the CDATA styles to the stylesheet
    infp = open(file, 'rt', encoding=('utf-8'))
    outfile1 = wdir + os.sep + 'stylesheet.css'
    outfp1 = open(outfile1, 'at', encoding=('utf-8'))
    outfile2 = wdir + os.sep + 'text.html'
    outfp2 = open(outfile2, 'wt', encoding=('utf-8'))
    for line in infp:
        if '/*<![CDATA[*/' in line:
            line = line.replace('/*<![CDATA[*/', '')
            for line in infp:
                if '/*]]>*/' in line:
                    line = line.replace('/*]]*/', '')
                    break
                else:
                    if line == '\n':
                        continue
                    outfp1.write(line.strip() + '\n')                  
                    
        else:
            outfp2.write(line)        
    
    outfp1.close()
    outfp2.close()    
    infp.close() 
    
    os.remove(file)
    shutil.copy(outfile2, file)
    os.remove(outfile2)
    
    
    # properly format the CDATA styles in the stylesheet
    infp = open(wdir + os.sep + 'stylesheet.css', 'rt', encoding=('utf-8'))
    outfile = wdir + os.sep + 'styles.css'
    outfp = open(outfile, 'at', encoding=('utf-8'))
    
    for line in infp:
    
        # reformat the CSS styles properly in stacked format
        if ('p.' in line or \
            'span.' in line or \
            'div.' in line or \
            'h1.' in line or \
            'h2.' in line or \
            'h3.' in line or \
            'h4.' in line or \
            'h5.' in line or \
            'h6.' in line or \
            'b.' in line or \
            'i.' in line) and \
            ('{' in line and '}' in line):
            line = line.strip().lower()
            line = line.replace('\s', '')
            line = line.replace('{', ' {\n')
            
            if ';}\n' in line:
                line = line.replace(';}\n',';\n}\n')
            elif '; ' in line:
                    line = line.replace('; ', ';\n')
            else: 
                if ';' in line:
                    line = line.replace(';', ';\n')
             
            line = line.replace('}', '\n}\n')
            line.strip().replace(line, line + '\n')
            outfp.write(line)
        else: 
            line.strip().replace(line, line + '\n')
            outfp.write(line)
    
    outfp.close()    
    infp.close()
    
    os.remove(wdir + os.sep + 'stylesheet.css')
    shutil.copy(outfile, wdir + os.sep + 'stylesheet.css')
    os.remove(outfile)
    return(0)   
   
def setGlobalCSSValues(wdir):
    """ The p tag, rather like the "Normal" style 
        in Word, is inherited by all styles. So by adding 
        attributes to this style I am deliberately setting 
        default attribute values for all paragraph styles in 
        the CSS. This is especially useful for avoiding LITB 
        problems (due to the Kindle overrides) after KDP upload.        
    """
    print(' -- Add useful and helpful globals and presets to CSS')
    
    file = wdir + os.sep + 'stylesheet.css'
    outfile = wdir + os.sep + 'attributes.css'
    infp = open(file, 'rt', encoding=('utf-8'))
    outfp = open(outfile, 'wt', encoding=('utf-8'))
    
    # Adds additional and useful attributes to 'p' style
    for line in infp:
        if 'p  {' in line and 'sup  {' not in line:
            outfp.write(line)
            for line in infp:
                if 'text-indent:' in line or \
                    'text-align:' in line or \
                    'font-size:' in line or \
                    'font-weight:' in line or \
                    'font-style:' in line:
                    continue
                  
                if '}' in line:
                    line = line.replace('}\n', 'text-indent: 0em;\ntext-align: justify;\nfont-size: 1em;\n')
                    outfp.write(line)
                    line = 'font-weight: normal;\nfont-style: normal;\n}\n'
                    outfp.write(line)
                    break
                else:
                    outfp.write(line)
        
        # add text-indent 0em to all heading styles 
        # to avoid LITB problems after KDP upload       
        elif 'h1  {' in line or \
            'h2  {' in line or \
            'h3  {' in line or \
            'h4  {' in line or \
            'h5  {' in line or \
            'h6  {' in line:
            outfp.write(line)
            for line in infp:
                if '}\n' in line:
                    line = line.replace('}\n', 'text-indent: 0em;\n}\n')
                    outfp.write(line)
                    break
                else: 
                    outfp.write(line)     
        else:
            outfp.write(line) 
            
    outfp.close()                
    infp.close() 

    os.remove(file)
    shutil.copy(outfile, file)
    os.remove(outfile)

    return(0)


def formatImages(line, ldir):
    from cutils import getImageSize
    
    # get the image link
    soup = BeautifulSoup(line, 'html.parser')
    tag = soup.find('img')
    if tag:
        text = tag['src']      
    else:
        return(line)  
    
    #get the img file name from the img path    
    file_name = os.path.basename(text)
    
    # add local working dir to img file name
    file_name = ldir + os.sep + file_name
    
    # check that image path exists
    if os.path.isfile(file_name):
        pass
    else:
        print('\n >>> Error: This image file does not exist in your local directory:' + '\n >>> ' + file_name)
        print(' >>> Could not add this image to the epub.')
        print(' >>> Check that this image is in the same directory as your html file and try again.\n')
        root = tk.Tk()
        root.withdraw()
        root.quit() 
        result = mbox.askyesno('eBook Image File not Found', 'This ebook image file does not exist in your local directory: \n' + os.path.basename(file_name) + \
                   '\n\nUnable to add this image to the epub. This missing ebook image should be in the '+ \
                   'same directory as your html file.\n\nDo you want to continue with the conversion and ' + \
                   'add the missing image(s) later using Sigil?', icon=mbox.WARNING)            
        if result == True:            
            line = line.replace(' align="left"', '')
            return(line)            
        else:
            shutil.rmtree(options.TMP_WORK_DIR, ignore_errors=True)
            sys.exit(0)
     
    # get image dimensions
    width, height = getImageSize(file_name)
    perc_width = round(width/900 * 100)       # calculates width as a percentage of screen width
    perc_height = round(height/1200 * 100)  
    
    if perc_width > 100:
        perc_width = 100
        
    if perc_height > 100:
        perc_height = 100
        
    file_name = os.path.split(file_name)[1]
    file_name = file_name.replace(' ', '_')
    
    if (perc_width == 100) and (perc_height == 100) and not width > height:
            image_line = '<div style="margin-top: 0em;margin-bottom: 0;">\n' + \
                         ' <p class="ebk-imagestyle"><img alt="" src="../Images/' + file_name + '" style="width: ' + \
                         str(perc_width) + '%;height: auto;"/></p>\n</div>\n'
    else:
        image_line = '<div style="margin-top: 2em;margin-bottom: 2em;">\n' + \
                     ' <p class="ebk-imagestyle"><img alt="" src="../Images/' + file_name + '" style="width: ' + \
                     str(perc_width) + '%;height: auto;"/></p>\n</div>\n'
        
    line = image_line
    return(line)    
    
def formatBookImages(ldir, wdir, file):
    print(' -- Reformat and insert ebook images')
    # inserts and reformats all ebook images 
    outfile = wdir + os.sep + 'images.html'
    infp = open(file, 'rt', encoding=('utf-8'))
    outfp = open(outfile, 'wt', encoding=('utf-8'))
    for line in infp:
         if '<img' in line:
            line = formatImages(line, ldir)
            outfp.write(line)
         else:
            outfp.write(line)         
    
    outfp.close()                
    infp.close() 
    os.remove(file)
    os.rename(outfile, file)
    return(0)   
#
#   Add html headers  
#    
def addHTMLHeaders(outfp, debug_flag):
    logmsg5(debug_flag)   
    outfp = addHTMLHeaders1(outfp)
    outfp = addHTMLHeaders2(outfp)
    return(outfp)    

def addMainHeaders(outfp, fname):
    outfp = addHTMLHeaders1(outfp, fname)
    return(outfp)    
#
#   Add the XHTML meta header
#    
def addHTMLHeaders1(outfp, fname):    
    lang = getdefaultlocale()[0]
    if lang == None:
        lang = 'en-US'
    lang = lang.replace("_", "-")
    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n') 

    #outfp.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
    outfp.write('<html xml:lang="' + lang + '" xmlns="http://www.w3.org/1999/xhtml">')
    outfp.write('<head>\n')
    #outfp.write('<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>\n')
    outfp.write('<title>' + fname + '</title>\n')
    outfp.write('  <link href="../Styles/stylesheet.css" type="text/css" rel="stylesheet"/>\n')
    outfp.write('</head>\n')
    outfp.write('<body class="globals">\n')
    return(outfp)
    
def addHTMLHeaders3(outfp, fname):    

    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n')
    outfp.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
    outfp.write('<head>\n')
    outfp.write('<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>\n')
    outfp.write('<title>' + fname + '</title>\n')
    outfp.write('  <link href="../Styles/stylesheet.css" type="text/css" rel="stylesheet"/>\n')
    outfp.write('</head>\n')
    outfp.write('<body class="globals">\n')
    return(outfp)    

def addImageFileHeaders(outfp):
    outfp.write('<?xml version="1.0" encoding="utf-8"?>\n')
    outfp.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n')
    outfp.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
    outfp.write('<head>\n')
    outfp.write('<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>\n')
    outfp.write('<title>Cover</title>\n')
    outfp.write('  <link href="../Styles/stylesheet.css" type="text/css" rel="stylesheet"/>\n')
    outfp.write('</head>\n')
    return(outfp)    
    
#
#   add top header tags  
#    
def addHTMLHeaders2(outfp):
    line = '</head>\n' + '\n'
    outfp.write(line)
    return(outfp)

    
def addHTMLTail(wdir, file):
    infp = open(file,'rt', encoding=('utf-8'))
    output = wdir + os.sep + "tails.html"
    outfp = open(output, 'wt', encoding=('utf-8'))
    
    for line in infp:
        outfp.write(line)
        
    outfp.write('\n</body>\n</html>\n\n')            
    
    outfp.close()
    infp.close()
    
    os.remove(file)
    os.rename(output, file)   

    return(0)
 
def reformatTidyStyles(wdir, file):
    finish = False
    output = os.path.join(wdir, 'ostyles1.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(file, 'rt', encoding='utf-8') as infp:    
        for line in infp:
            
            if '@font-face' in line or \
                '{font-family:' in line or \
                'panose-' in line or \
                'transform:' in line:
                line = ''    

            if line.strip() == '':
                continue                     
        
            if '</style>' not in line and finish == False:     
                line = line.replace('{', '{\n')
                line = line.replace(';', ';\n')
                line = line.replace('}', '\n}\n')
                outfp.write(line.strip() + '\n')
            else:
                finish = True           
                outfp.write(line.strip() + '\n')
                
    outfp.close()
    os.remove(file)
    os.rename(output, file)   
    return(0)