#----------------------------------------------------------------------------
# hTranslate.
#----------------------------------------------------------------------------
import sys
import os
import re
import time
import datetime
from pathlib import Path
from PyQt5.QtCore import pyqtSignal,  QObject
from bs4 import BeautifulSoup
import win32com.client as win32

import hConstant as hC
import hFileClassE as hFc
import hHtmlParser as hHp
import hXhtmlTemplate as hXt
import hSigilBkUtil as hBk

class HtmFile(hFc.File):
    createdHtmFileSignal= pyqtSignal(str)
    def __init__(self, docxFile,  htmFile,  startPage,  endPage,  workingDirPath):
        super().__init__(htmFile)
        super().setFileProperties(htmFile)
        #
        self.docxFile= docxFile
        self.htmFile= htmFile
        self.startPage= startPage
        self.endPage= endPage
        self.workingDirPath= workingDirPath
        #
        self.wdFormatFilteredHTML= 10
        self.wdGoToPage= 1
        self.wdGoToAbsolute= 1
        self.wdPrintView= 3
        self.wdPasteDefault= 0

    def extract(self):   
        try:
            #   create Word application
            wordApp = win32.Dispatch("Word.Application")
    #        wordApp.visible= True  
            #   open .docx file
            # delete existing img files if any
            self.deleteExistingImgFiles(self.htmFile,  self.workingDirPath)
            #
            wordApp.Documents.Add(self.docxFile)
            #   print view to have page number
            wordApp.ActiveWindow.View.Type= self.wdPrintView   

            #   goto start & end page. Define range of pages
            wordApp.Selection.GoTo (What=self.wdGoToPage, Which=self.wdGoToAbsolute, Count=self.startPage)
            rgePages = wordApp.Selection.Range
            wordApp.Selection.GoTo (What=self.wdGoToPage, Which=self.wdGoToAbsolute, Count=self.endPage)
            rgePages.End = wordApp.Selection.Bookmarks("\\Page").Range.End
            #   select range
            rgePages.Select()
            #   selection copy
            wordApp.Selection.Copy()
            #   open empty docx to paste selection
            wordApp.Documents.Add()        
            #   paste selection in empty document
            wordApp.Selection.PasteAndFormat (self.wdPasteDefault)
            #   save empty document as htm

            wordApp.ActiveDocument.SaveAs2(self.htmFile,  FileFormat= self.wdFormatFilteredHTML)
            #   close all .docx files 
            for doc in wordApp.Documents:
                doc.Close()
            # quit from Word.Application    
            wordApp.Quit()
            # reads content
            fileContent= self.read()
        except:
            fileContent= '*'+ str(sys.exc_info()[0])+ '\n'+ str(sys.exc_info()[1])+ '\n'+ str(sys.exc_info()[2])
        self.createdHtmFileSignal.emit(fileContent)

    def deleteExistingImgFiles(self,  htmFile,  workingDir):
        # When class HtmFile extracts htm from a docx file, if in the docx there are images, a directory
        # will be created in working diretory to save all images. 
        # If the docx file is named Document.docx the images directory will have name Document_file.
        # This function deletes existing images (result of a previous extraction) from Document_file before 
        # the class HtmFile extracts htm from docx. 
        imgDir= Path(htmFile).stem+ '_file'
        if os.path.exists(workingDir+ imgDir):
            imgFileLst= os.listdir(workingDir+ imgDir)
            for file in imgFileLst:
                filePath= workingDir+ imgDir+ '/'+ file
                os.remove(filePath)


class TextFile(hFc.File):
    createdTxtFileSignal= pyqtSignal(str)    
    def __init__(self, txtFile,   htmCode, workingDirPath, bk):
        super().__init__(txtFile)
        super().setFileProperties(txtFile)      
        
        self.htmCode= htmCode
        self.txtFile= txtFile
        self.workingDirPath= workingDirPath
        self.bk= bk
        self.chapterNumber= ""
        self.chapterTitle= ""
        self.imgAlignMode= ""
 
    def chapterTitleNum(self):
        return [self.chapterNumber, self.chapterTitle] 
 
    def createFromHtm(self):
        try:
            # deletes .txt file if exists
            if self.fileExists():
                self.delete()
            # opens file .txt for writing chunks
            f= self.openTextFile('w')
     
            locChunk = ""
            chunkType = ""
            htmLst= self.htmCode.splitlines()
            for locRecord in htmLst:
                
                if chunkType != "table":
                    if "<p class=MsoNormal" in locRecord:
                        chunkType = "normal"                            
                    elif  "<p class=MsoListParagraphCxSpFirst" in locRecord:
                        chunkType = "listOne"
                    elif "<p class=MsoListParagraphCxSpMiddle" in locRecord:
                        chunkType = "listMid"
                    elif "<p class=MsoListParagraphCxSpLast" in locRecord:
                        chunkType = "listLast"
                    elif "<p class=MsoListParagraph" in locRecord:
                        chunkType = "singleItemList"
                    elif "<h1" in locRecord:
                        chunkType = "header1"

                if "<table" in locRecord:
                    chunkType = "table"
                
                if chunkType == "table": 
                    locChunk = locChunk + locRecord
                    if "</table>" in locRecord: #--- end chunk
                        locChunk= self.processChunk(locChunk, chunkType)
                        ####################################
                        locChunk= self.adjustTable(locChunk)
                        ####################################
                        f.write (locChunk+ '\n')
                        locChunk = ""
                        chunkType = ""
                              
                elif chunkType == "header1":
                    locChunk = locChunk + locRecord
                    if "</h1>" in locRecord:
                        locChunk= self.processChunk(locChunk, chunkType)
                        #    processes chapter & chapter number
                        chapterTitleAndNumber = locChunk
                        if chapterTitleAndNumber != '':
                            self.chapterTitle = chapterTitleAndNumber        # works for PROLOGUE and EPILOGUE
                            # split chars from VBA (string), different length dashes
                            # to have values for Python use ord()
                            # ord("–") ord('-') ord('—')
                            splitCharLst= [chr(8211), chr(45), chr(8212)]
                            for i, char in list(enumerate(splitCharLst)):
                                if char in chapterTitleAndNumber:
                                    titNumLst= chapterTitleAndNumber.split(splitCharLst[i])
                                    self.chapterNumber= titNumLst[0].replace(' ',  '')
                                    self.chapterTitle= titNumLst[1].strip()  # remove lading and trailing spaces
                                    break
                            locChunk = ""
                            chunkType = ""
                        
                elif chunkType != "":
                    locChunk = locChunk + locRecord + " "
                    if "</p>" in locRecord:
                        locChunk= self.processChunk(locChunk, chunkType)

                        f.write (locChunk+ '\n')
                        locChunk= ""
                        chunkType= ""
            
            f.close()
            # read file and emit signal
            fileContent= self.read()  
          
            # modify txt file to have the correct path for the images
            imgDir= Path(self.txtFile).stem+ '_file'
            if os.path.exists(self.workingDirPath+ imgDir):
                imgFileLst= os.listdir(self.workingDirPath+ imgDir)
                fileContent= fileContent.replace(imgDir, '../Images', len(imgFileLst))
                # update also on disk
                self.write(fileContent)

####                # load images into Sigil
####                self.bkUtil= hBk.BkUtil()
####                if not hC.DEBUG:
####                    for file in imgFileLst:
####                        filePath= self.workingDirPath+ imgDir+ '/'+ file
####                        self.loadImageToSigil(filePath)

        except:
            fileContent= '*'+ str(sys.exc_info()[0])+ '\n'+ str(sys.exc_info()[1])+ '\n'+ str(sys.exc_info()[2])
        self.createdTxtFileSignal.emit(fileContent)
 
 
    def processChunk(self, locChunk,  chunkType):
        # checks for images
        if 'src' in locChunk:
            # checks for img alignment
            filterStart= 'align='
            if filterStart in locChunk:
                filterEnd= ' '
                start= locChunk.find(filterStart)
                end= locChunk.find(filterEnd,  start)
                self.imgAlignMode= locChunk[start+ len(filterStart):end]  
                
        #
        if chunkType== 'normal':
            if "align=center" in locChunk:
                stringNew = '<p class="calibre12">'
            elif "&nbsp;" in locChunk:
                stringNew = '<p class="calibre17">'
            elif "font-size:12.0pt" in locChunk:
                stringNew = '<p class="calibre99>'
            else:
                stringNew = '<p class="calibre16">'
                        
            if self.imgAlignMode:
                # set alignment for images
                stringNew= stringNew.replace('>', ' align="'+ self.imgAlignMode+ '">')
#                stringNew= '<p class='+ stringNew+ ' align="'+ self.imgAlignMode+ '">'
                self.imgAlignMode= ''
                    
            pattern = "<p[^>]*>"
            locChunk= re.sub(pattern, stringNew,  locChunk) 
            pattern = "<span[^>]*>"
            locChunk= re.sub(pattern, '', locChunk)
            locChunk= locChunk.replace("</span>", "")
            locChunk= locChunk.replace("</I>", "</i>")  # lower case required by Sigil
            locChunk= locChunk.replace("<I>", "<i>")    # lower case required by Sigil
            if 'calibre16' in stringNew:
                locChunk= locChunk.replace("&nbsp;", "")
            locChunk= locChunk.replace("&nbsp;", "&#160;")
        
        elif chunkType== "listOne":
            pattern = "<p[^>]*>"
            locChunk= re.sub(pattern, "<ul><li>",  locChunk) 
            pattern = "<span[^>]*>"
            locChunk= re.sub(pattern, "",  locChunk) 
            locChunk= locChunk.replace("</span>", "")
            locChunk= locChunk.replace("·", "")
            locChunk= locChunk.replace("&nbsp;", "")
            locChunk= locChunk.replace("</p>", "")
            locChunk = locChunk+ "</li>"
            
        elif chunkType== "listMid":   
            if ">o<" in locChunk:
                # two level list
                locChunk= locChunk.replace(">o<", ")^(")
                stringNew= "<ul><li>"
                stringNew2= "</li></ul>"
            else:
                stringNew= "<li>"
                stringNew2= "</li>"
            pattern = "<p[^>]*>"    
            locChunk= re.sub(pattern, stringNew,  locChunk)     
            pattern = "<span[^>]*>"    
            locChunk= re.sub(pattern, "",  locChunk)     
            locChunk= locChunk.replace("</span>",  "")
            locChunk= locChunk.replace("·",  "")
            locChunk= locChunk.replace("&nbsp;",  "")
            locChunk= locChunk.replace("</p>",  "")
            locChunk = locChunk+ stringNew2
            locChunk= locChunk.replace(")^(",  "")
            
        elif chunkType== "listLast":
            if (">o<" in locChunk):
                locChunk= locChunk.replace(">o<", ")^(")
                stringNew= "<ul><li>"
                stringNew2= "</li></ul></ul>"
            else:
                stringNew= "<li>"
                stringNew2= "</li></ul>"
            pattern = "<p[^>]*>"    
            locChunk= re.sub(pattern, stringNew,  locChunk)      
            pattern = "<span[^>]*>"  
            locChunk= re.sub(pattern, "",  locChunk)
            locChunk= locChunk.replace("</span>",  "")
            locChunk= locChunk.replace("·",  "")
            locChunk= locChunk.replace("&nbsp;",  "")
            locChunk= locChunk.replace("</p>",  "")
            locChunk = locChunk + stringNew2   
            
        elif chunkType== "singleItemList":
            pattern = "<p[^>]*>"
            locChunk= re.sub(pattern, "<ul><li>",  locChunk)
            pattern = "<span[^>]*>"
            locChunk= re.sub(pattern, "",  locChunk)
            locChunk= locChunk.replace("</span>",  "")
            locChunk= locChunk.replace("·",  "")
            locChunk= locChunk.replace("&nbsp;",  "")
            locChunk= locChunk.replace("</p>",  "")
            locChunk = locChunk+ "</li></ul>"
            
        elif chunkType== "header1":
            pattern = "<h1[^>]*>"
            locChunk= re.sub(pattern, "",  locChunk)
            pattern = "<span[^>]*>"
            locChunk= re.sub(pattern, "",  locChunk)
            locChunk= locChunk.replace("</h1>",  "")
            locChunk= locChunk.replace("</span>",  "")
            pattern = "<br[^>]*>"
            locChunk= re.sub(pattern, "",  locChunk)
             
        elif chunkType== "table":      
            pattern = "<span[^>]*>"   
            locChunk= re.sub(pattern, "<span>",  locChunk)         
            
        return locChunk  


    def adjustTable(self, locChunk):
        parser = hHp.MyHTMLParser()
        parser.feed(locChunk)
        while not parser.endTable:
            time.sleep(0.1)
        return parser.newChunk    
            
            
####    def loadImageToSigil(self, image):
####        oImageFile= hFc.File()
####        oImageFile.setFileProperties(image)
####        manifestId= oImageFile.baseName
####        fileContent= oImageFile.read()
####        mimeType= oImageFile.mime
####        loadIntoSpine= False
####        askIfFileAlreadyIn= False
####        #
####        self.bkUtil.bkLoadFileIntoSigil(self.bk, manifestId, fileContent, mimeType, loadIntoSpine, askIfFileAlreadyIn)
####


class XhtmlFile(hFc.File):
    createdXhtmlFileSignal= pyqtSignal(str)
    #
    def __init__ (self, xhtmlFile, chapterNum, chapterTit, textCode):        
        super().__init__(xhtmlFile)
        super().setFileProperties(xhtmlFile)          
        
        self.xhtmlFile= xhtmlFile
        self.chapterNum= chapterNum
        self.chapterTit= chapterTit
        self.textCode= textCode
        
    def create(self):
        # instance of File class for xhtmlTemplateFile       
        templateFileContentLst= hXt.txtXhtmlPageTemplate.splitlines()

        x= self.openTextFile('w')

        try:
            for templateRecord in templateFileContentLst: 
                if 'CHAPTERNUMBER' in templateRecord:
                    templateRecord= templateRecord.replace('CHAPTERNUMBER',  self.chapterNum)
                    x.write(templateRecord)
                elif 'CHAPTERTITLE' in templateRecord:
                    templateRecord= templateRecord.replace('CHAPTERTITLE',  self.chapterTit)
                    x.write(templateRecord)
                elif 'CHAPTERTEXT'  in templateRecord:
                    codeLst= self.textCode.splitlines()
                    for lineCode in codeLst:
                        x.write(lineCode+ '\n')
                elif 'FILEPOS' in templateRecord:
                    locDate= datetime.datetime.now()
                    locDate= locDate.strftime('%Y' + '%m' + '%d'+ '%H'+ '%M'+ '%S')
                    locFilepos= 'filepos'+ locDate
                    templateRecord= templateRecord.replace('FILEPOS',  locFilepos)
                    x.write(templateRecord)
                else:
                    x.write(templateRecord)
                    
                x.write('\n')   # required because .splitlines() deletes \n at end of line 

            x.close()

            # read file and send signal
            fileContent= self.read()

        except:
            fileContent= '*'+ str(sys.exc_info()[0])+ '\n'+ str(sys.exc_info()[1])+ '\n'+ str(sys.exc_info()[2])
        
        # prettify 
        soup = BeautifulSoup(fileContent, features="html.parser",  from_encoding="utf-8" )
        fileContent= soup.prettify() 
        # save prettified 
        x= self.openTextFile('wb') 
        for rec in fileContent:
            rec= rec.encode(encoding='UTF-8') 
            x.write(rec)
        x.close()
        #
        self.createdXhtmlFileSignal.emit(fileContent)  
        

class Translate(QObject): 
    htmFileReadySignal= pyqtSignal(str)
    txtFileReadySignal= pyqtSignal(str)
    xhtmlFileReadySignal= pyqtSignal(str)
    
    def __init__(self, paramLst,  bk):
        super().__init__()
        self.bk= bk
        self.paramLst= paramLst
        self.error= False
#        for i, p in enumerate(paramLst):
#            print (f"{i}  {p}  type {type(p)}")
        # all parameters are string
        self.startDocxFile= paramLst[0]
        self.htmFile= paramLst[1]
        self.txtFile= paramLst[2]
        self.xhtmlFile= paramLst[3]
####        self.xhtmlTemplateFile= paramLst[4]
####        self.startPage= paramLst[5]
####        self.endPage= paramLst[6]
        self.startPage= paramLst[4]
        self.endPage= paramLst[5]
        self.workingDirPath= paramLst[6]

    
    #   ---
    def extractHtmFile(self):
        self.htmFile= HtmFile(self.startDocxFile, self.htmFile, self.startPage, self.endPage, self.workingDirPath)
        self.htmFile.createdHtmFileSignal.connect(self.htmFileReady)        
        self.htmFile.extract()

    def htmFileReady(self, fileContent):
        if fileContent[0:1]== '*':
            self.error= True
        self.htmFileReadySignal.emit(fileContent)
    
    #   ---
    def createTxtFile(self):  
        htmCode= self.htmFile.read()
        self.txtFile= TextFile(self.txtFile, htmCode, self.workingDirPath, self.bk)
        self.txtFile.createdTxtFileSignal.connect(self.txtFileReady)        
        self.txtFile.createFromHtm()

    def txtFileReady(self, fileContent):
        if fileContent[0:1]== '*':
            self.error= True  
        else:
            lst= self.txtFile.chapterTitleNum()
            self.chapterNumber = lst[0]
            self.chapterTitle = lst[1]            
        self.txtFileReadySignal.emit(fileContent)
    
    #   ---
    def createXhtmlFile(self):
        txtCode= self.txtFile.read()
####        self.xhtmlFile= XhtmlFile(self.xhtmlFile, self.xhtmlTemplateFile, self.chapterNumber, self.chapterTitle , txtCode)
        self.xhtmlFile= XhtmlFile(self.xhtmlFile, self.chapterNumber, self.chapterTitle , txtCode)
        self.xhtmlFile.createdXhtmlFileSignal.connect(self.xhtmlFileReady)
        self.xhtmlFile.create()
    
    def xhtmlFileReady(self,  fileContent):
        if fileContent[0:1]== '*':
            self.error= True       
        self.xhtmlFileReadySignal.emit(fileContent)
