#!/Python3/python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

import os, os.path, sys, codecs, shutil, inspect, re
from decimal import *
from cutils import *
from cutils3 import *
import time
from datetime import datetime
from doc_tidy import * 
from PIL import Image
import options
import tkinter as tk
import tkinter.messagebox as mbox

__all__=["processAllTasks", "copyTextFiles2Dir", "copyCSSFiles2Dir", "copyImageFiles2Dir", "writeFiles2CSS", "writeFiles2Epub", "removeLineHeight", "extraHTMLCleanup", "sanitizeHTML", "removeBadAttributesfromCSS", "cssFinalFormat", "prettifyCSS", "cleanExit", "repairHTMLIDs", "CheckandRepairID", "repositionAnchorEndTags", "getHREF_ID_Epub","removeUnusedBookmarks", "svgAttributes2CamelCase", "addData2NCXFile", "removeTextColorBlack","removeUnusedImages", "removeBGColorWhite","repairHTMLFileName","repairImageFileNames"]

try:

    from sigil_bs4 import BeautifulSoup, Comment
except:
    from bs4 import BeautifulSoup, Comment  

TOC_NAMES_LIST = ['nav.',
                  'toc.',
                  'contents.', 
                  'coverpage.',
                  'titlepage.',
                  'cover.'              
                 ]
    
def processAllTasks(bk, wdir, t_fnames, s_fnames, s_ids):
    print('\n -- Processing automatic tasks...')
    print(' -- Clean and reformat the html')

    # convert the CSS from a single line
    # layout per class to stacked layout.
    for css in s_fnames:
        css = os.path.join(wdir, css)
        normalizeCSSLayout(wdir, css)

    href_ids = list()    
    href_ids = getHREF_ID_Epub(bk, wdir, t_fnames)
    removeUnusedBookmarks(wdir, t_fnames, href_ids)
    
    print(' >>> Epub version is: ' + options.EPUB_VERSION)
    
    for file in t_fnames:
    
        # do not process the toc or the cover files
        for name in TOC_NAMES_LIST:
            if name in file.strip().lower():
                continue
        
        file = os.path.join(wdir, file)
        if options.EPUB_VERSION == 'EPUB2':
            docTidyNoWrap(wdir, file)
        prettifyXHTMLFile(wdir, file)
        removeHardBreaks(wdir, file)
        removeAlignAttr(wdir, file)
        convertName2IDAttr(wdir, file)
        sanitizeHTML(wdir, file)
        removeFontTags(wdir, file)
        removeRedundantHTML(wdir, file)
        extraHTMLCleanup(wdir, file)
        removeAttributes(wdir, file)
        removeLangAttrs(wdir, file)
        convertTags(wdir, file)  
        removeAllIDsLinks(wdir, file)  ###
        removeAllIDS(bk, wdir, file)
        removePageLinks(wdir, file)
        removeInternalLinks(wdir, file)       
        removeInternetLinks(wdir, file)
        removeDivTags(wdir, file)
        fixHTMLAttrValues(wdir, file)
        repairHTMLIDs(wdir, file)
        removeBGColorWhite(bk, wdir, file)
        removeTextColorBlack(bk, wdir, file)
        addAltAttr2ImageTags(wdir, file)     ###
        reformatSmallImages(wdir, file)      ###.title
        addAltAttr2ImageTags(wdir, file)
        removeUnusedImages(bk)
        prettifyXHTMLFile(wdir, file)
        
    for fname in s_fnames:
        fname = os.path.join(wdir, fname)
        removeBadAttributesfromCSS(bk, wdir, fname) ###
        removeRedundantCSS(wdir, fname)
        cssFinalFormat(wdir, fname)

    return(0)    
    
def copyTextFiles2Dir(bk, wdir):
    t_ids = list()
    t_hrefs = list()
    t_fnames = list()
    
    for (id, href) in bk.text_iter():
        t_ids.append(id)
        t_hrefs.append(href)
        t_fnames.append(os.path.basename(href))
    
    # copy all xhtml files to the working dir    
    file = str()
    t_fnames_r = list()
    t_ids_r = list()
    i = 0      
    for id in t_ids:
        file = os.path.join(wdir, t_fnames[i])  
        print(' -- Copy to work dir...' + t_fnames[i])
        with open(file, 'wt', encoding='utf-8') as outfp:
            data = bk.readfile(id)
            html = BeautifulSoup(data, 'html.parser')
            t_fnames_r.append(t_fnames[i])
            t_ids_r.append(id)
            outfp.writelines(str(html))
            i = i + 1
    
    return(t_ids_r, t_fnames_r) 
                
def copyCSSFiles2Dir(bk, wdir):
    s_ids = list()
    s_hrefs = list()
    s_fnames = list()
    for (i, h) in bk.css_iter():
        s_ids.append(i)
        s_hrefs.append(h)
        s_fnames.append(os.path.basename(h))
        
    j = 0    
    for sid in s_ids:
        file = os.path.join(wdir, s_fnames[j])
        print(' -- Write to work dir...' + file)
        with open(file, 'wt', encoding='utf-8') as outfp: 
            data = bk.readfile(sid)
            html = BeautifulSoup(data, 'html.parser')
            outfp.writelines(str(html))                
            j = j + 1     
            
    return(s_ids, s_fnames) 

def copyImageFiles2Dir(bk, wdir):
    print(' ')
    i_ids = list()
    i_hrefs = list()
    i_fnames = list()
    
    for (i, h, m) in bk.image_iter():
        i_ids.append(i)
        i_hrefs.append(h)
        i_fnames.append(os.path.basename(h))
        
    j = 0    
    for iid in i_ids:
        file = os.path.join(wdir, i_fnames[j])
        print(' -- Copy images to work dir...' + file)
        
        if i_fnames[j].endswith('.svg'):                        ###  new code for svg image fix
            with open(file, 'wt', encoding='utf-8') as outfp:   ###
                data = bk.readfile(iid)                         ###  
                outfp.write(data)                               ###
                j = j + 1                                       ###
        else:                                                   ### 
            with open(file, 'wb') as outfp: 
                data = bk.readfile(iid)
                outfp.write(data)                
                j = j + 1     
                
    return(i_ids, i_fnames) 
    
def writeFiles2CSS(bk, wdir, ids, s_fnames): 
    # no css files with imported html docs
    count = len(s_fnames)
    if count == 0: 
        return(0)
    
    i = 1
    print(' ')  
    
    i = 0
    print(' ')  
    for file in s_fnames:
        prettifyCSS(wdir, file)    
        print(' -- Write to epub CSS...' + str(file)) 
        with open(os.path.join(wdir, file), 'rt', encoding='utf-8') as fp:              # input file is epub 
            data = fp.read()           
            bk.writefile(ids[i], data)
            i = i + 1
                        
    return(0)                  
    
def writeFiles2Epub(bk, wdir, ids, fnames):

    print(' >>> In writeFiles2Epub()...' )       
    for file in fnames:    
       print(file)    

    count = len(fnames)
    if count == 0: 
        return(0)
    
    i = 0
    print(' ')  
    for file in fnames:
        print(' -- Write files to epub...' + fnames[i])
        file = os.path.join(wdir, file)
        with open(file, 'rt', encoding='utf-8') as fp:
            data = fp.read()           
            bk.writefile(ids[i], data)
            i = i + 1
                            
    return(0)  

def removeLineHeight(line):
    
    # remove all line-height attribute properties from html
    if 'line-height:' in line:
        i = 100
        while i < 121:
            line = line.replace('line-height: ' + str(i) + '%;', '') 
            line = line.replace('line-height: ' + str(i) + '%', '') 
            i = i + 1
            
        j = 100
        while j < 121:
            line = line.replace('line-height:' + str(j) + '%;', '')
            line = line.replace('line-height:' + str(j) + '%', '')
            j = j + 1
             
        k = 100
        while k < 121:
            line = line.replace('line-height: ' + str(k) + '%;', '')
            line = line.replace('line-height: ' + str(j) + '%', '')
            k = k + 1
             
        l = 100
        while l < 121:
            line = line.replace('line-height:' + str(l) + '%;', '')
            line = line.replace('line-height:' + str(j) + '%', '')
            l = l + 1 
            
        line = line.replace('line-height: 12pt;', '')   
        line = line.replace('line-height:12pt;', '')
        line = line.replace('line-height: 12pt', '')
        line = line.replace('line-height:12pt', '')        
        
        line = line.replace('line-height: 14pt;', '')
        line = line.replace('line-height:14pt;', '')
        line = line.replace('line-height: 14pt', '')
        line = line.replace('line-height:14pt', '') 
        
        line = line.replace('line-height: 1.2;', '') 
        line = line.replace('line-height: 1.2', '') 
        line = line.replace('line-height:1.2;', '') 
        line = line.replace('line-height:1.2', '') 
        
        line = line.replace('line-height: 1.4;', '') 
        line = line.replace('line-height: 1.4', '') 
        line = line.replace('line-height:1.4;', '') 

        line = line.replace('line-height: 1.5;', '') 
        line = line.replace('line-height: 1.5', '') 
        line = line.replace('line-height:1.5;', '') 
        line = line.replace('line-height:1.5', '') 

        line = line.replace('line-height: normal;', '') 
        line = line.replace('line-height: norml', '') 
        line = line.replace('line-height:normal;', '') 
        line = line.replace('line-height:normal', '') 
        
        # remove style attributes if null 
        soup = BeautifulSoup(line, 'html.parser')
        for tag in soup.find_all(True):
            if tag.has_attr('style') and (tag['style']== '' or tag['style']== None):
                del tag['style']        
        line = str(soup)
        
    return(line)
    
    
def extraHTMLCleanup(wdir, file):
    """ Extra and more extensive cleanup is 
        required to remove or change all the 
        unneeded proprietary data from the 
        imported html doc.       
    """
    
    output = os.path.join(wdir, 'adhoc_cleaning.html')
    outfp = open(output, 'wt', encoding=('utf-8'))
    infp = open(file, 'rt', encoding='utf-8')
    
    # remove or change adhoc html
    for line in infp:
  
        if line.strip().startswith('/*') and line.strip().endswith('*/'):
            continue

        if line.strip().startswith('/*') and not line.strip().endswith('*/'):
            for line in infp:
                if not line.strip().endswith('*/'):
                    continue
                else:
                    line = ''
                    break                               
        
        if 'border:none' in line.replace(' ',''):
            line = line.replace('border: none;', '').replace('border:none;', '')
            line = line.replace('border: none', '').replace('border:none', '')
            
        if 'orphans:' in line:
            line = line.replace('orphans: 0','')
            line = line.replace('orphans: 1','')
            line = line.replace('orphans: 2','')
            line = line.replace('orphans: 3','')
            line = line.replace('orphans: 4','')
            line = line.replace('orphans:0','')
            line = line.replace('orphans:1','')
            line = line.replace('orphans:2','')
            line = line.replace('orphans:3','')
            line = line.replace('orphans:4','')
            
        if 'widows:' in line:
            line = line.replace('widows: 0','')
            line = line.replace('widows: 1','')
            line = line.replace('widows: 2','')
            line = line.replace('widows: 3','')
            line = line.replace('widows: 4','')
            line = line.replace('widows:0','')
            line = line.replace('widows:1','')
            line = line.replace('widows:2','')
            line = line.replace('widows:3','')
            line = line.replace('widows:4','')    
            
        if ';;' in line:
            line = line.replace(';;',';')
            
        if '<img align="left"' in line.lower():
            line = line.replace('<img align="left"', '<img ')
            line = line.replace('<img align="LEFT"', '<img ')
            line = line.replace('<img ALIGN="LEFT"', '<img ')  

        # fixes malformed, standalone image tag lines in the html
        if line.strip().startswith('<img '):
            line = line.strip().replace('<img ', '<p style="text-align: center;"><img ')
            line = line + '</p>'
            line = '\n' + line + '\n'            
            
        if '<meta' in line and ('name="generator"' in line.lower() or 'http-equiv="content-type"' in line.lower()):
            outfp.write(line)
            continue
        
        line = line.replace(' transform:', '')    
        line = line.replace(' rotate(0.00rad)', '')    
        line = line.replace(' translateZ(0px);', '')
        line = line.replace('-webkit-transform:', '') 
        
        #if options.REMOVE_LH == True:
        #    line = removeLineHeight(line)             
    
        if 'font-family: "Times New Roman", serif' in line:
            if 'font-family: "Times New Roman", serif;"' in line:
                line = line.replace('font-family:"Times New Roman", serif;', '')
                line = line.replace('font-family: "Times New Roman", serif;', '')
            else:
                line = line.replace('font-family:"Times New Roman", serif', '')  
                line = line.replace('font-family: "Times New Roman", serif', '')              
        
        if 'direction: inherit' in line:
            if 'direction: inherit;' in line:
                line = line.replace('direction: inherit;', '')
            else:
                line = line.replace('direction: inherit', '') 
         
        if 'position: absolute' in line:
            if 'position: absolute;' in line:
                line = line.replace('position: absolute;', '')
            else:
                line = line.replace('position: absolute', '')
                
        if 'letter-spacing: normal' in line:
            if 'letter-spacing: normal;' in line:
                line = line.replace('letter-spacing: normal;', '')
            else:
                line = line.replace('letter-spacing: normal', '')         
            
        if 'text-decoration: none' in line:
            if 'text-decoration: none;' in line:
                line = line.replace('text-decoration: none;', '')
            else:
                line = line.replace('text-decoration: none', '')      
            
        if 'font-variant: normal' in line:
            if 'font-variant: normal;' in line:
                line = line.replace('font-variant: normal;', '')
            else:
                line = line.replace('font-variant: normal', '')  

        soup = BeautifulSoup(line, 'html.parser')
        for sup in soup.find_all('sup'):
            if sup.has_attr('class'):
                del sup['class']
            sup['style'] = 'font-size: 0.8em;line-height: normal;vertical-align: top;'
            line = str(soup)    

        soup = BeautifulSoup(line, 'html.parser')
        for sub in soup.find_all('sub'):
            if sub.has_attr('class'):
                del sub['class']
            sub['style'] = 'font-size: 0.65em;line-height: normal;vertical-align: bottom;'
            line = str(soup)                    
        
        # fix <span> subscript problems
        soup = BeautifulSoup(line, 'html.parser')
        for span in soup.find_all('span'):
            if 'vertical-align:sub' in str(span).replace(' ', ''):
                span['style'] = 'font-size: 0.65em;line-height: normal;vertical-align: bottom;'
                line = str(soup)      
            
        if line.strip() == ';':
            continue            
        
        liner = line.strip()            
        if liner == None:
            continue
            
        if '\t' in line:
            line = line.replace('\t', '')        
        
        outfp.write(line)     
        
    outfp.close()
    infp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)     
    
def sanitizeHTML(wdir, file):
    """ Removes all unnecessary proprietary 
        tags or attributes from the html. 
    """        
    output = os.path.join(wdir, 'new_html.htm')
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')  
        
    search_box = ['p','h1', 'h2', 'h3', 'h4']
    for tag in soup.find_all(search_box):
        if tag.has_attr('align'):
            if 'align="center"' in str(tag).lower():
                del tag['align']
                if tag.has_attr('style'):
                    if tag['style'].endswith(';'):
                        tag['style'] = tag['style'] + 'text-align: center;'
                    else:
                        tag['style'] = tag['style'] + ';text-align: center;'
                else:
                    tag['style'] = 'text-align: center;'
                               
    pt_size = str()
    if options.DOCTYPE == 'OpenOffice' or options.DOCTYPE == 'LibreOffice':
        for ptag in soup.find_all('p'):
            if ptag.font:
                if ptag.font.has_attr('size'):
                    pt_size = getPointSize(str(ptag.font['size']))
                    ptag.font.attrs = {}
                    ptag.font.unwrap()
                if ptag.has_attr('style'):
                    if str(ptag['style']).endswith(';'): 
                        ptag['style'] = ptag['style'] + 'font-size: ' + pt_size + 'pt;'
                    else:
                        ptag['style'] = ptag['style'] + ';font-size: ' + pt_size + 'pt;'
                else:
                    ptag['style'] = 'font-size: ' + pt_size + 'pt;'                 
            
    # ensures epub image compliance
    for fonts in range(len(soup.find_all("font"))):
        le_font = soup.find_all("font")[fonts]
        del (le_font["face"])  
    
    # ensures epub image compliance
    for imgs in range(len(soup.find_all("img"))):
        le_img = soup.find_all("img")[imgs]
        del (le_img["border"])
        del (le_img["clear"])
        del (le_img["align"])
        del (le_img["title"])
        
        if le_img.has_attr('name'):
            le_img['id'] = le_img['name']
            del(le_img['name'])
    
    for c in soup.find_all('p', 'span'):
        if 'align="left"' in str(c).lower():
            del c['align']                   
  
    for c in soup.find_all('img'):
        if c.has_attr('align'):
            del c['align']
        if c.has_attr('title'):
            del d['title']        
            
    for e in soup.find_all('img'):            
        if e.has_attr('border'):
            del e['border']
            
    # remove unnecessary text decoration        
    for s in soup.find_all('p', 'span'):
        if s.has_attr('style'):
            if 'text-decoration: none' in str(s):
                s['style'] = str(s['style']).replace('text-decoration: none', '')
                    
    # remove unnecessary page-breaks     
    for s in soup.find_all('p'):
        if s.has_attr('style'):
            if 'page-break-before: always' in str(s):
                if 'page-break-before: always;' in str(s):
                    s['style'] = str(s['style']).replace('page-break-before: always;', '')
            else:
                s['style'] = str(s['style']).replace('page-break-before: always', '')               
    if options.EPUB_VERSION != 'EPUB3':
        # remove 'background: transparent' for epub 2 only
        for j in soup.find_all('span'):
            if 'background:transparent' in str(j).lower():
                j.attrs = {}
                j.extract()        
    
    # add "alt" to img tags
    for f in soup.find_all('img'):
        if not f.has_attr('alt'):
            f['alt'] = ""
    
    # convert align=center to text-align inline 'style' attributes
    searcher = ['p','h1', 'h2', 'h3', 'h4']  
    for t in soup.find_all(searcher):
        if t.has_attr('class') and 'align="center"' in str(t).lower():
            del t['align']
            if t.has_attr('style'):
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: center;' 
                else:
                    t['style'] = t['style'] + ';text-align: center;'
            else:
                t['style'] = 'text-align: center;'
                            
        else:
            if t.has_attr('style') and 'align="center"' in str(t).lower():
                del t['align']
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: center;' 
                else:
                    t['style'] = t['style'] + ';text-align: center;'
                    
    # convert align=justify to text-align inline 'style' attributes
    searched = ['p','h1', 'h2', 'h3', 'h4']  
    for t in soup.find_all(searched):
        if t.has_attr('class') and 'align="justify"' in str(t).lower():
            del t['align']
            if t.has_attr('style'):
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: justify;' 
                else:
                    t['style'] = t['style'] + ';text-align: justify;'
            else:
                t['style'] = 'text-align: justify;'
                            
        else:
            if t.has_attr('style') and 'align="justify"' in str(t).lower():
                del t['align']
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: justify;' 
                else:
                    t['style'] = t['style'] + ';text-align: justify;'  
                    

    # convert align=justify to text-align inline 'style' attributes
    searches = ['p','h1', 'h2', 'h3', 'h4']  
    for t in soup.find_all(searches):
        if t.has_attr('class') and 'align="left"' in str(t).lower():
            del t['align']
            if t.has_attr('style'):
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: left;' 
                else:
                    t['style'] = t['style'] + ';text-align: left;'
            else:
                t['style'] = 'text-align: left;'
                            
        else:
            if t.has_attr('style') and 'align="left"' in str(t).lower():
                del t['align']
                if t['style'].endswith(';'):
                    t['style'] = t['style'] + 'text-align: left;' 
                else:
                    t['style'] = t['style'] + ';text-align: left;'

    # remove all 'style' attributes from h1 tags   
    for h in soup.find_all('h1'):
        if h.has_attr('style'):
            del h['style']
     
    # remove empty h1 tags     
    for h in soup.find_all('p', 'h1'): 
        if h.get_text() == '' or h.get_text() == ' ':
            if '<img' not in str(h):
                del h['style']
                del h['class']
                h.extract()             
    
    # remove any p tags with just spaceif '<img' not in str(h):
    for x in soup.find_all('p'):
        if x.string == ' ':
            if '<img' not in str(x):
                x.decompose()
    
    # remove all empty heading tags        
    search_tags=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ]            
    for h in soup.find_all(search_tags):
        if h.string == '':
            if '<img' not in str(h):
                del h['style']
                del h['class']
                h.extract()
                
    if options.REMOVE_EMPTY_SPANS == True:
        # remove empty span tags
        for d in soup.find_all(['span']):     
            if d.attrs == {} and d.parent.name != 'div':
                d.unwrap()
    
    # remove 'value' attribute        
    for tag in soup.find_all(value=True):
        if tag.has_attr('value'):
            del tag['value']
            
    # remove all section tags        
    for tag in soup.find_all('section'):
        tag.attrs = {}
        tag.unwrap()
        
    # remove old amazon specific attributes    
    for tag in soup.find_all('img'):
        if tag.has_attr('data-amznremoved'):
            del tag['data-amznremoved']
        if tag.has_attr('data-amznremoved-m8'):
            del tag['data-amznremoved-m8']    
            
    for tag in soup.find_all(True):
        if tag.has_attr('lang'):
            del tag['lang']
        if tag.has_attr('xml:lang'):
            del tag['xml:lang']        
                 
            
    if options.REPLACE_DIV_TAGS == True:        
        for dtag in soup.find_all('div'):
            if '<img' not in str(dtag) and \
                '<svg' not in str(dtag):
                dtag.name = 'p' 
                
    # convert from 'name' to 'id' attribute
    for tag in soup.find_all(True):
        if tag.has_attr('name') and tag.name != 'meta' :
            tag['id'] = tag['name']
            del tag['name']                
    
    outfp.writelines(str(soup))
    outfp.close()
    
    os.remove(file)
    os.rename(output, file)
    return(file)    
    
def removeBadAttributesfromCSS(bk, wdir, file):
    print(' -- Remove unwanted style properties from the CSS')
    
    file = os.path.join(wdir, file)
    output = wdir + os.sep + 'removed_fonts.css'
    outfp = open(output, 'w', encoding='utf-8')
    with open(file, 'r', encoding='utf8') as infp:  
        for line in infp:                 
            line = line.replace('-western', '')
            
            if '/*<![CDATA[*/' in line:
                continue
            if '/*]]>*/' in line:
                continue                  

            if 'body.globals  {' in line:
                outfp.write(line)
                for line in infp:
                    if ']' not in line:
                        outfp.write(line)
                    else:
                        outfp.write(line)
      
            # remove text black color declarations
            if options.REMOVE_DEFAULT_TEXT_COLOR:
                line1 = line.lower().lstrip().replace(' ', '')
                if line1.startswith('color:#000000;') or \
                    line1.startswith('color:#00000') or \
                    line1.startswith('color:#000;') or \
                    line1.startswith('color:#000\n') or \
                    ('color:' in line1 and '#' not in line1):
                    continue
                    
            # remove all white bg color declarations
            if options.REMOVE_DEFAULT_BG_COLOR:
                line2 = line.lower().lstrip().replace(' ', '')
                if line2.startswith('background-color:#ffffff;') or \
                    line2.startswith('background-color:#ffffff') or \
                    line2.startswith('background-color:#fff;') or \
                    line2.startswith('background-color:#fff\n') or \
                    ('background-color:' in line2 and '#' not in line2):
                    continue        
            
            if options.EPUB_VERSION != 'EPUB3':
                if 'background-color:transparent' in line.strip().replace(' ','') or \
                    'background:transparent' in line.strip().replace(' ',''):
                    continue
                
            if 'mso-style-link:' in line.strip() or 'mso-style-name:' in line.strip():
                continue                
                        
            if 'pc;' in line:
                line = line.replace('pc;', 'em;')               
                        
            if 'font-weight:700' in line.strip().replace(' ', ''):
                    line = line.replace('700', 'bold')
                    
            if 'font-weight:400' in line.strip().replace(' ', ''):
                line = line.replace('400', 'normal')                                 
        
            if 'position:absolute' in line.replace(' ', '') or \
                'font-variant:normal' in line.replace(' ', '') or \
                'text-decoration:none' in line.replace(' ', '') or \
                'letter-spacing:normal' in line.replace(' ', '') or \
                'vertical-align:normal' in line.replace(' ', '') or \
                'page-break' in line or \
                'transform' in line or \
                'so-language:' in line or \
                '-webkit-transform' in line or \
                'direction:' in line or \
                'widows:' in line or \
               'orphans:' in line or \
                line == ';\n':
                continue    
            
            outfp.write(line)
            
    outfp.close()
    os.remove(file)
    shutil.copy(output, file)
    os.remove(output)               
               
    return(0)         
    
        
def cssFinalFormat(wdir, file):
    # convert any styling to lower case
    print('Remove_Fonts flag == ' + str(options.REMOVE_FONTS))
    print('Infile name...' + file)
    
    infile = os.path.join(wdir, file)
    outfile= os.path.join(wdir,'last.css')
    infp = open(infile, 'r', encoding='utf-8')
    outfp = open(outfile, 'w', encoding='utf-8') 
    for line in infp:
        
        if ' :' in line:
            line = line.replace(' :',':')
        
        if ' ;' in line: 
            line = line.replace(' ;',';') 

        if 'rem;' in line:
            line = line.replace('rem;','em;')        
    
        line = reduceDigitalZeros(line)
            
        if '{\n' in line:
            #line = line.replace(' ', '')
            line = line.replace('{', '  {')
            line = line.replace(',', ', ')  
        
        if '@pageSection1' in line or \
            'div.Section1' in line or \
            '.PapDefault' in line:         
            outfp.write('')
            for line in infp:
                if '}' in line:
                    outfp.write('')
                    break
                else:
                    continue
            line = line.replace('}\n', '')

        if ' 0%' in line:
            line = line.replace(' 0%',' 0')
        if ' 0em' in line:
            line = line.replace(' 0em',' 0')
        if ' 0pt' in line:
            line = line.replace(' 0pt',' 0')            
        if ' 0px' in line:
            line = line.replace(' 0px',' 0')
        if ' 0in' in line:
            line = line.replace(' 0in',' 0')                   
           
        if 'page: Section1;' in line:
            continue
            
        if 'a: link' in line:
            line = line.replace('a: link','a:link')
            
        if 'a: visited' in line:
            line = line.replace('a: visited','a:visited') 
            
        if 'a: hover' in line:
            line = line.replace('a: hover','a:hover')     
        
        if '@mediaprint,' in line:        
            outfp.write('')
            for line in infp:
                if '}' in line:
                    outfp.write('')
                    break
                else:
                    continue
            line = line.replace('}\n', '')
               
        if options.REMOVE_FONTS == True:   
            if line.strip().startswith('font-family:') or \
                line.strip().startswith('font-family :'):
                continue            
            
        if options.REMOVE_HYPHEN_PROPS == True:    #### Added new options on 12-02-23
            if 'adobe-hyphenate:' in line or \
               'writing-mode:' in line or \
               '-epub-writing-mode:' in line or \
               '-webkit-writing-mode:' in line or \
               '-webkit-hyphens:' in line or \
               '-webkit-hyphenate' in line or \
               '-moz-hyphens:' in line or \
               '-ms-hyphens:' in line or \
               'hyphenate' in line or \
               'hyphens' in line or \
               'hyphen' in line:
               continue
                      
        line = line.replace('P.', 'p.')
        line = line.replace('H1.', 'h1.')
        line = line.replace('H2.', 'h2.')
        line = line.replace('H3.', 'h3.')
        line = line.replace('H4.', 'h4.')
        line = line.replace('H5.', 'h5.')
        line = line.replace('H6.', 'h6.')
        
        line = line.replace('H1', 'h1')
        line = line.replace('H2', 'h2')
        line = line.replace('H3', 'h3')
        line = line.replace('H4', 'h4')
        line = line.replace('H5', 'h5')
        line = line.replace('H6', 'h6')
        line = line.replace('DIV.', 'div.')
        line = line.replace('SPAN.', 'span.')
        if line.strip() == '' or (line.strip().startswith('/*') and line.strip().endswith('*/')):
            continue 
            
        if line.strip().startswith('/*') and not line.strip().endswith('*/'):
            for line in infp:
                if not line.strip().endswith('*/'):
                    continue
                else:
                    line = ''
                    break                    
            
        if not line.strip().endswith(';') and \
            '{' not in line and \
            '}' not in line and \
            ',' not in line:
            line = line.strip() + ';\n'
            
        line = line.lstrip()
        if line.endswith(';}\n'):
            line = line.replace(';}\n', ';\n}\n')
        if line.strip().endswith('{'):
            line = line.replace('{\n', '')
            line = line.strip() + '  {\n'
            
        outfp.write(line.strip() + '\n')
    
    infp.close()
    outfp.close()
    os.remove(infile)
    os.rename(outfile, infile)
    prettifyCSS(wdir, file)
    return(0)   

 

def prettifyCSS(wdir, css):
    # ensure no blank lines in the css
    css = os.path.join(wdir, css)
    output = os.path.join(wdir, 'link_rel.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
        
            if '{  {\n' in line:
                line = line.replace('{  {\n', '{\n')        
        
            if line.strip() == ';':
                continue
            
            if re.match(r'^\s*$', line):
                continue 
            
            if line.strip() == '':
                continue               
            
            if ';' in line and ':' not in line and not line.strip().startswith('@'):
                line = line.replace(';', '')    
                
            outfp.write(line.strip() + '\n')                  
  
    outfp.close()
    os.remove(css)
    os.rename(output, css)
    
    return(0)     

def cleanExit(wdir):
    shutil.rmtree(wdir, ignore_errors=True)
    return(0)
    
def repairHTMLIDs(wdir, file):
    outfile = os.path.join(wdir, 'html_repair.html')
    outfp = open(outfile, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()
    
    soup = BeautifulSoup(html, 'html.parser')
    
    # remove soaces in id values
    for anchor in soup.find_all('a'):
        if anchor.has_attr('id'):
            anchor['id'] = anchor['id'].replace(' ', '')
            anchor['id'] = anchor['id'].replace(r'%20', '')

    # remove spaces in href values        
    for atag in soup.find_all('a'):
        if atag.has_attr('href') and '#' in atag['href']:
            atag['href'] = atag['href'].replace(' ', '')
            atag['href'] = atag['href'].replace(r'%20', '')
            
    print('\n >>> Check xhtml file ids and hrefs...')
    ### check and repair the xhtml text file ids
    search_tags1 = ['a', 'p', 'body', 'div', 'h1']
    for a_tag in soup.find_all(search_tags1):
        if a_tag.has_attr('id'):
            old_id = a_tag['id']
            new_id = CheckandRepairID(a_tag['id'])
            a_tag['id'] = new_id             
           
    search_tags2 = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']        
    for h_tag in soup.find_all(search_tags2):
        if h_tag.has_attr('id'):
            old_id = h_tag['id']
            new_id = CheckandRepairID(h_tag['id'])
            h_tag['id'] = new_id             
   
    ### check and repair internal links
    for href_tag in soup.find_all('a'):
        if href_tag.has_attr('href') and '#' in href_tag['href']:
            link, id = href_tag['href'].split('#')
            print(' >>> href id...' + id)
            if id == '' or id == None:
                continue
            new_id = CheckandRepairID(id)
            href_tag['href'] = link + '#' + new_id          
                
    outfp.writelines(str(soup))  
    outfp.close()    
    os.remove(file)
    os.rename(outfile, file)    
    return(0)
    
def CheckandRepairID(id):
    id = id.strip()
    print('\n>>> Check ID...' + id)
    
    # ensure first char is always an alpha char
    char_list  = list(id)
    first_char = char_list[0]
    if first_char.isdigit():
        print(' >>> Repair id...' + id)
        first_char = 'x'
        char_list[0] = first_char
        new_id = "".join(char_list)
        print(' >>> New ID...' + new_id + '\n')         
        return(new_id)
    else:
        return(id)    
        
def repositionAnchorEndTags(wdir, file):
    """ 
        Some word-processors, such as Word, will 
        insert lone anchor ids(with no hrefs) that 
        surround the corresponding anchor text string. 
        This is ported to the html file and is the 
        wrong way to format ids in epubs. For instance, 
        in ADE, if your epub contains such anchor tags, 
        then the text with these ids will diplay as blue 
        with underline just like a link.
        
        This function just repositions the anchor end tag 
        to before the text string to avoid the above problems.       
    """
    
    print('\n>>> In repositionAnchorEndTags()...')           
    output = os.path.join(wdir, 'reformat.html')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()

    soup = BeautifulSoup(html, 'html.parser')    
    
    # remove all anchor tags with no attributes
    for atag in soup.find_all('a'):
        if atag.attrs == {}:
            atag.unwrap()
    
    # ensure all name attributes are 
    # converted to id attributes
    for a_tag in soup.body.find_all('a'):
        if a_tag.has_attr('name'):
            a_tag['id'] = a_tag['name']
            del a_tag['name']
    
    # ensure correct anchor end tag position for all anchor ids
    search_tags = ['p','h1','h2','h3']
    for tag in soup.find_all(search_tags):
        for atag in tag.find_all('a'):
            if atag.has_attr('id') and not atag.has_attr('href'):     
                if atag.string != None:                              
                    a_string = atag.string
                    atag.string = ''
                    new_string = soup.new_string(a_string)   
                    tag.append(new_string)
    
    # ensure correct anchor end tag position for all anchor ids    
    tag_search = ['h4','h5','h6']
    for tag in soup.find_all(tag_search):
        for atag in tag.find_all('a'):
            if atag.has_attr('id') and not atag.has_attr('href'):     
                if atag.string != None:    
                    a_string = atag.string
                    atag.string = ''
                    new_string = soup.new_string(a_string)   
                    tag.append(new_string)                
                        
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    return(0)          
    
    
def getHREF_ID_Epub(bk, wdir, fnames):
    
    print('\n>>> In getHREF_ID_Epub...')
    href_ids = list()
    
    print('>>> xhtml file list...')
    for t in fnames:
        print(t)
    
    for file in fnames:
            
        file = os.path.join(wdir, file)
        outfile = os.path.join(wdir, 'get_hrefs.html')
        outfp = open(outfile, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()
        
        soup = BeautifulSoup(html, 'html.parser')
        
        # build a list of xhtml href ids
        for atag in soup.find_all('a'):
            if atag.has_attr('href') and '#' in atag['href']:
                id_ref = atag['href'].split('#')[1]
                href_ids.append(id_ref) 
    
    # add the href ids from the guide items
    for type, title, href, id in bk.guide_iter():
        if '#' in href:
            gid = href.split('#')[1]
            href_ids.append(gid)
    
    # add the href ids from toc.ncx
    tocid = bk.gettocid() 
    try:    
        data = bk.readfile(tocid)
    except Exception:
        return(href_ids)
    soup = BeautifulSoup(data, 'xml')
    for tag in soup.find_all('content'):
        if tag.has_attr('src') and '#' in tag['src']:
            id = tag['src'].split('#')[1]
            print('TOC.NCX ids added to HREF_IDS..')
            print(id)
            href_ids.append(id)
    
    return(href_ids)
    
def removeUnusedBookmarks(wdir, t_fnames, href_ids): 
    """ Remove all unused ids/bookmarks from 
        epubs    
    """
    
    bm_removed = 0
    
    for file in t_fnames:
    
        file = os.path.join(wdir, file)
        outfile = os.path.join(wdir, 'remove_ids.html')
        outfp = open(outfile, 'wt', encoding='utf-8')
        html = open(file, 'rt', encoding='utf-8').read()
        
        soup = BeautifulSoup(html, 'html.parser')     
              
        #ensure all bookmarks have only 'id' attributes
        for tag in soup.find_all(name=True):
            if tag.has_attr('name'):  
                idref = tag['name']
                del tag['name']
                tag['id'] = idref
                
        # remove all bookmarks not in the href id list   
        href_id_str = " ".join(href_ids)        
        for tag in soup.find_all(id=True):        
            if tag['id'] not in href_id_str:
                del tag['id']
                bm_removed += 1
                if tag.name == 'a' and tag.attrs == {}:
                    tag.unwrap()
                    
        outfp.writelines(str(soup))             
        outfp.close()
        os.remove(file)
        os.rename(outfile, file)
        
    options.TOTAL_REMOVED_BOOKMARKS = bm_removed         
    return(0)        
                 
def addData2NCXFile(bk, file):

    if options.EPUB_VERSION != 'EPUB2':
        return(0)

    uuid = ''
    # get the OPF uuid
    data = bk.getmetadataxml()
    soup = BeautifulSoup(data, 'xml')
    for tag in soup.find_all(id=True):
        if 'dc:identifier' in str(tag):
            uuid = tag.get_text()     
        
    # add the OPF uuid to the ncx 'content' attr    
    data = bk.readfile('ncx')
    soup = BeautifulSoup(data, 'xml')
    for tag in soup.find_all('meta'):
        if tag.has_attr('content') and tag.has_attr('name'):
            if tag['name'] == 'dtb:uid':
                tag['content'] = uuid   
   
    bk.writefile('ncx', str(soup))
    print('>>> UUID string added to NCX...' + uuid) 
      
    data = bk.getmetadataxml()
    dara = data.splitlines(True)
    new_line = []
    del dara[-1]
    for line in dara:
        if '<metadata' not in line:
            line = '    ' + line
            new_line.append(line)
        else:
            line = '  ' + line
            new_line.append(line)
           
    # add language and timestamp to the opf metadata       
    now = datetime.now()
    timestamp = now.strftime("%Y-%m-%dT%H:%M:%S+00:00")
    new_line.append('    <dc:title>' + os.path.basename(file).split('.')[0] + '</dc:identifier>\n')   
    new_line.append('    <dc:date>' + timestamp + '</dc:date>\n')           
    new_line.append('    <dc:language>en</dc:language>\n')
    new_line.append('  </metadata>\n')    
    bk.setmetadataxml(''.join(new_line))
    
    # set the 'docTitle' and file path in ncx  
    setNCXData(bk, file)        
    
    return(0)
    
    
def removeTextColorBlack(bk, wdir, file):
    """ Removes all the standard black text color 
        declarations from the CSS and from the inline 
        styling in the html. Removes all the various 
        connotations and flavors of the following 
        declarations:
        color: #000
        color: #000000
        color: #00000A
        color: #00000a
        color: black
    """
    
    prefs = bk.getPrefs()

    if options.REMOVE_DEFAULT_TEXT_COLOR == False:
        return(0)

    print('>>>In removeTextColorBlack()...\n')  
    
    print('FILENAME...' + os.path.basename(file) + '\n')

    # removes black text color from xhtml
    file = os.path.join(wdir, file)    
    output = os.path.join(wdir, 'restyle.html')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    style_str = ''
    for tag in soup.find_all(style=True):
        if 'color:' in str(tag['style']):   
            tag['style'] = str(tag['style']).lower().replace(' ', '')    
        
            print('All styles before...' + str(tag['style']))     
        
            # remove any explicit white bg declarations in xhtml  
            for b_color in options.XHTML_TEXT_COLOR_BLACKLIST:
                print('Show bg-color and in text color...' + b_color.strip() + ' in ' + tag['style'])
                tag['style'] = str(tag['style']).replace(b_color.strip(), '')
        
        print('All styles after...' + str(tag['style']))     
        
        if tag['style'] == '':
            del tag['style']
        else:
            # ensure that the html inline styling has correct spacing 
            tag['style'] = tag['style'].replace(' ','').replace(':', ': ')         
        
        # if style and span tag empty then delete         
        #if tag.name == 'span' and tag.attrs == {}:
        #    tag.unwrap()
        
    outfp.write(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)        
    return(0)                             
    
        
def removeUnusedImages(bk):
    """ Dekletes all unused images from Sigil's Images dir.
    """
    
    print('\n>>> In removeUnusedImages()...\n')
    
    if options.REMOVE_UNUSED_IMAGES == False:
        return(0)
    
    stored_images = []
    xhtml_images = []
    img_ids = []
    xhtml_img_list = []
    deleted_img_list = []
    x_image = []
    
    # create a list of image names stored in the Images dir
    stored_count = 0
    for id, href, mime in bk.image_iter():
        img_name = bk.href_to_basename(href)
        stored_images.append(img_name)
        img_ids.append(id)
        stored_count += 1
           
    print('>>> Total images stored in Sigil\'s Images dir: ' + str(stored_count) + '\n')    
        
    # create list of images in xhtml files
    xhtml_count = 0
    for id, href in bk.text_iter():
        data = bk.readfile(id)
        html = BeautifulSoup(data, 'lxml')
        
        # create a list of standard images
        for imgtag in html.find_all('img'):
            if imgtag.has_attr('src'):
                img_name = os.path.basename(str(imgtag['src']))
                if img_name not in xhtml_img_list:
                    xhtml_img_list.append(img_name)
                    xhtml_count += 1
                
        # find and process svg images as well        
        for image in html.find_all('image'):
            if image.has_attr('xlink:href'):
                img_name = os.path.basename(image['xlink:href'])
                if img_name not in xhtml_img_list:
                    xhtml_img_list.append(img_name)
                    xhtml_count += 1
                    
    print('>>>Total images in xhtml files...' + str(xhtml_count) + '\n')
                    
    # create a list of images for deletion
    for s_image in stored_images:
        if s_image not in " ".join(xhtml_img_list):
            deleted_img_list.append(s_image)
            
    # now remove all unused image files from Images dir
    deleted_count = 0
    for img in deleted_img_list:
        id = bk.basename_to_id(img)
        bk.deletefile(id)
        deleted_count += 1
        
    print('>>>Total images deleted from Sigil\'s Images dir...' + str(deleted_count) + '\n')       
           
    msg = 'DELETED IMAGE FILES: \n'
    
    if stored_count - xhtml_count > 0:
        for i in deleted_img_list:
            msg += i + '\n'
        msg += '\n>>> ALL unused stored Image files have been successfully removed from the Images dir.\n'    
    else:
        msg += 'No files were deleted\n'
            
    print(msg)          
    return(0)
    
def removeBGColorWhite(bk, wdir, file):
    """ Removes all the standard white BG color 
        declarations from the CSS and from the inline 
        styling in the html. Removes the following 
        declarations:
        background-color: #FFF
        background-color: #FFFFFF
        background-color: white
    """
    
    if options.REMOVE_DEFAULT_BG_COLOR == False:
        return(0)
    
    prefs = bk.getPrefs()
    print('>>>In removeBGColorWhite()...')    

    # removes black text color from xhtml
    file = os.path.join(wdir, file)    
    output = os.path.join(wdir, 'restyle.html')
    outfp = open(output, 'wt', encoding='utf-8')
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    print('FILENAME...' + os.path.basename(file) + '\n')
    
    style_str = ''
    for tag in soup.find_all(style=True):
        if 'background-color:' in str(tag['style']):   
            tag['style'] = str(tag['style']).lower().replace(' ', '')
        
            # remove any explicit white bg declarations in xhtml  
            for bg_color in options.XHTML_BG_COLOR_BLACKLIST:
                print('Show bg-color and in style color...' + bg_color.strip() + ' in ' + tag['style'])
                tag['style'] = tag['style'].replace(bg_color.strip(), '')
        
        print('All styles after...' + str(tag['style']))     
 
        if tag['style'] == '':
            del tag['style']
        else:
            # ensure that html inline styling has correct spacing 
            tag['style'] = tag['style'].replace(' ','').replace(':', ': ')         
       
    outfp.write(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)        
    
    return(0)            
    
def normalizeCSSLayout(wdir, css):

    output = os.path.join(wdir, 'reformat1.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
 
           # convert style properties from single line to stacked format
            if '{' in line and ':' in line and '}' in line:
                line = line.replace('{', '  {\n')
                line = line.replace(';', ';\n')
                line = line.replace('}', '\n}\n')
            
            if r'&gt' in line:
                line = line.strip()
                line = line.replace(r'&gt;','>')
                line = line.replace(r'&gt','>')
                line = line.strip()
                     
            outfp.write(line)
    
    outfp.close()
    os.remove(css)
    os.rename(output, css)    
    
    output = os.path.join(wdir, 'reformat2.html')
    outfp = open(output, 'wt', encoding='utf-8')
    with open(css, 'rt', encoding='utf8') as infp:      
        for line in infp:
            if line.strip() == '':
                continue
        
           # ensure that all style properties end in ';'
            if '{' not in line and '}' not in line and ',' not in line:
                if not line.strip().endswith(';'):
                    line = line + ';'                
    
            outfp.write(line + '\n')
    
    outfp.close()
    os.remove(css)
    os.rename(output, css)    
    return(0)    
    
def removePageLinks(wdir, file):

    if options.REMOVE_PAGE_LINKS == False:
        return(0)
   
    print(' -- Remove all internal links')
    output = os.path.join(wdir, 'remove_int_links.htm')
    outfp = open(output, 'wt', encoding=('utf-8'))
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')
    
    for tag in soup.find_all('a'):
        if tag.has_attr('href') and \
            'http:' not in tag['href'] and \
            'https:' not in tag['href'] and \
            '#' not in tag['href']:
            del tag['href']
            del tag['class']                  
            if tag.attrs == {}:                         
                tag.unwrap()
                
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    
    return(0)            
    
def removeAllHTMLIDs(wdir, file):
    
    if options.FILE_TYPE != 'HTML':
        return(0)
    
    output = os.path.join(wdir, 'remove_IDs.htm')
    outfp = open(output, 'wt', encoding=('utf-8'))
    file = os.path.join(wdir, file)
    html = open(file, 'rt', encoding='utf-8').read()
    soup = BeautifulSoup(html, 'html.parser')      
    
    #remove all ids from html only
    for itag in soup.find_all(id=True):
        del itag['id']
        
    # remove all ids from internal href links - for html only!
    for tag in soup.find_all(href=True):
        if tag.has_attr('href') and \
            'http://' not in tag['href'] and \
            'https://' not in tag['href']:
            if '#' in tag['href']:
                del tag['href']
                for atag in soup.find_all('a'):
                    if atag.attrs == {}: 
                        atag.unwrap()    
    
    outfp.writelines(str(soup))
    outfp.close()
    os.remove(file)
    os.rename(output, file)    
    return(0)
    
def setNCXData(bk, file):
    
    d_title = os.path.basename(file).split('.')[0]
    
    data = bk.readfile('ncx')
    soup = BeautifulSoup(data, 'xml')
    
    for text in soup.find_all('text'):
        text.string = 'Kalapati'
    
    # add correct html file location to 'content'
    for content in soup.find_all('content', limit=1):
        content['src'] = 'Text/' + os.path.basename(file)
        
    bk.writefile('ncx', str(soup))
     
    return(0) 
    
def repairHTMLFileName(bk):    
    
    if options.FILE_TYPE != 'HTML':
        return(0)
        
    ids = []   
    fname = ''
    old_fname = ''
    fnames = []
    
    # get and store the file name
    for id, href in bk.text_iter():
        old_fname = bk.href_to_basename(href)
        fnames.append(old_fname)
        ids.append(id)
        
    # check and fix any unwanted spaces in the html file name
    if ' ' in fnames[0]:
        # replace spaces with underscores in fname
        fname = fnames[0].replace(' ','_')
        # get the epub file and save the data
        data = bk.readfile(ids[0])
        # delete the html file from the epub
        bk.deletefile(ids[0])
        # re-create and add the html file back to the epub
        bk.addfile(ids[0], fname, data)  
        # add the html file to the first position in the opf spine
        bk.spine_insert_before(1, ids[0], None)
        # update everytihing in opf file
        opf = bk.get_opf()
           
    return(0)
    
def repairImageFileNames(bk):   
    
    # get all the image file names
    for id, href, mime in bk.image_iter():
        old_fname = bk.href_to_basename(href)
        
        # repair and replace with underscores any 
        # unwanted spaces in the image file names 
        if ' ' in old_fname.strip():
            # replace spaces with underscores in fname
            new_fname = old_fname.replace(' ','_')
            data = bk.readfile(id)
            bk.deletefile(id)
            bk.addfile(id, new_fname, data)
            
            # update all changes in the opf file
            opf = bk.get_opf()
    
    # now repair and replace any unwanted 
    # spaces in the <img> and <svg> image 
    # file names that are currently used 
    # in the html file or epub xhtml files
    for id, href in bk.text_iter():
        data = bk.readfile(id)
        soup = BeautifulSoup(data, 'html.parser')
        # check and repair <img> tag image names
        for tag in soup.find_all('img'):
            img_path = tag['src']
            img_path = img_path.replace(' ','_')
            img_path = img_path.replace(r'%20','_')
            tag['src'] = img_path
        # check and repair <svg> tag image names          
        for tag in soup.find_all('image'):
            img_path = tag['xlink:href']
            img_path = img_path.replace(' ','_')
            img_path = img_path.replace(r'%20','_')
            tag['xlink:href'] = img_path    
            
        bk.writefile(id, str(soup))    
    
    return(0)
        