# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2016,2017,2018,2019,2020,2021,2022,2023 DaltonST'
__my_version__ = "1.0.197"  #Clarify type of custom column to update

import os, sys
import apsw
import datetime
import codecs
import time
from time import sleep
import unicodedata
from copy import deepcopy
import re
import collections

from calibre import isbytestring
from calibre.constants import filesystem_encoding, DEBUG
from calibre.utils.logging import Log as log
from calibre.utils.zipfile import ZipFile

from polyglot.builtins import as_unicode, iteritems, map, unicode_type
from polyglot.queue import Queue

from calibre_plugins.job_spy.config import prefs
from calibre_plugins.job_spy.heading import log_heading_common
from calibre_plugins.job_spy.text_extraction_utils import JSHTMLGetContent

#----------------------------------------------
#----------------------------------------------
my_terminate_early = False
#----------------------------------------------
notifications = Queue()
#----------------------------------------------
my_current_book_id = "000000"
#----------------------------------------------
my_book_ids = []
my_param_dict = {}
#----------------------------------------------
my_spanish_languages_list = []
my_english_languages_list = []
my_french_languages_list = []
my_german_languages_list = []
#----------------------------------------------
my_spanish_keywords_set = set()
my_english_keywords_set = set()
my_french_keywords_set = set()
my_german_keywords_set = set()
#----------------------------------------------
my_guidb = ""
my_plugin_path = ""
#----------------------------------------------
SUPPORTED_BOOK_FORMATS = ['TXT','EPUB']
#----------------------------------------------
SUPPORTED_BOOK_FORMATS_STRING =  "EPUB (Only)"
#----------------------------------------------
#----------------------------------------------
header_s1 = None
header_s2 = None
header_s3 = None
header_s4 = None
header_s5 = None
#----------------------------------------------
#----------------------------------------------
format_stats_dict = {}
#--------------------------------------------------------------
#--------------------------------------------------------------
#--------------------------------------------------------------
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
def main_js_ot_t_extract(self,guidb, plugin_path, book_ids, param_dict, log=None, abort=None, notifications=True):

    clear_or_initialize_globals()

    global header_s1
    global header_s2
    global header_s3
    global header_s4
    global header_s5

    global my_book_ids
    global my_param_dict

    global my_guidb

    global my_plugin_path

    log("Starting 'Extract Original Title/Translator'")
    notifications.put((0.002, 'Beginning Extract Original Title/Translator'))
    #----------------------------------------------------------------------------------------------------------------
    my_book_ids = deepcopy(book_ids)
    del book_ids
    my_book_ids.sort()

    my_param_dict =  param_dict.copy()
    del param_dict

    my_plugin_path = plugin_path

    #----------------------------------------------------------------------------------------------------------------
    my_guidb = guidb
    path = my_guidb.library_path
    path = as_unicode(path)
    path = path.replace(os.sep, '/')
    path = os.path.join(path, 'metadata.db')
    path = path.replace(os.sep, '/')

    log("Library DB: " + path)
    try:
        my_db =apsw.Connection(path)
    except Exception as e:
        log(as_unicode(e))
        raise e
        return

    my_cursor = my_db.cursor()

    header_s1 =  as_unicode("SQLite Version: " + as_unicode(apsw.SQLITE_VERSION_NUMBER) + "    [APSW]")
    mysql = as_unicode("PRAGMA main.busy_timeout = 2000")         #milliseconds
    my_cursor.execute(mysql)
    header_s2 = mysql

    header_s3 = "Beginning 'Extract Original Title/Translator' Processing"

    log_heading_common(log,header_s1,header_s2,header_s3,header_s4,header_s5)

   #----------------------------------------------------------------------------------------------------------------
    log(" ")

    JS_Control(my_cursor,my_db,log,notifications)

    log(" ")
    #----------------------------------------------------------------------------------------------------------------

    my_db.close()

    log(" ")
    log(" ")
    log("Job complete.")

    clear_or_initialize_globals()

    return
#----------------------------------------------------------------------------------------------------------------
def clear_or_initialize_globals():
    # called both at the beginning and the end of the job.

    if DEBUG: print("Clearing or initializing globals")

    # clear all lists, sets, dicts, and ordered dicts from the previous job that persist from that job to the next job(s) submitted (unless calibre is exited first)

    global my_book_ids
    my_book_ids[:] = []   #clear it

    # initialize all other globals

    global  my_terminate_early
    global  my_current_book_id
    global  my_guidb
    global  my_plugin_path
    global  SUPPORTED_BOOK_FORMATS
    global  SUPPORTED_BOOK_FORMATS_STRING
    global  header_s1
    global  header_s2
    global  header_s3
    global  header_s4
    global  header_s5

    my_terminate_early = False
    my_current_book_id = "000000"
    my_guidb = ""
    my_plugin_path = ""
    SUPPORTED_BOOK_FORMATS = ['EPUB']
    SUPPORTED_BOOK_FORMATS_STRING =  "EPUB (Only)"
    header_s1 = None
    header_s2 = None
    header_s3 = None
    header_s4 = None
    header_s5 = None

    global my_english_keywords_set
    my_english_keywords_set.clear()
    my_english_keywords_set.add("Translated")
    my_english_keywords_set.add("Translator")
    my_english_keywords_set.add("Translation")
    my_english_keywords_set.add("translated")
    my_english_keywords_set.add("translator")
    my_english_keywords_set.add("translation")

    global my_spanish_keywords_set
    my_spanish_keywords_set.clear()
    my_spanish_keywords_set.add("Traducción")
    my_spanish_keywords_set.add("Traductor")
    my_spanish_keywords_set.add("Traduccion")
    my_spanish_keywords_set.add("Traducido")
    my_spanish_keywords_set.add("traducción")
    my_spanish_keywords_set.add("traductor")
    my_spanish_keywords_set.add("traduccion")
    my_spanish_keywords_set.add("traducido")
    my_spanish_keywords_set.add("Del Inglés")
    my_spanish_keywords_set.add("Del inglés")
    my_spanish_keywords_set.add("del Inglés")
    my_spanish_keywords_set.add("del inglés")

    global my_french_keywords_set
    my_french_keywords_set.clear()
    my_french_keywords_set.add("Traducteur")
    my_french_keywords_set.add("traducteur")
    my_french_keywords_set.add("Traduction")
    my_french_keywords_set.add("traduction")
    my_french_keywords_set.add("Traduit")
    my_french_keywords_set.add("traduit")

    global my_german_keywords_set
    my_german_keywords_set.clear()
    my_german_keywords_set.add("Übersetz")
    my_german_keywords_set.add("übersetz")


    global my_spanish_languages_list
    tmp_set = set()
    tmp_set.add("albanés")
    tmp_set.add("árabe")
    tmp_set.add("bosnio")
    tmp_set.add("búlgaro")
    tmp_set.add("chino")
    tmp_set.add("croata")
    tmp_set.add("checo")
    tmp_set.add("danés")
    tmp_set.add("holandés")
    tmp_set.add("inglés")
    tmp_set.add("estonio")
    tmp_set.add("finés")
    tmp_set.add("francés")
    tmp_set.add("alemán")
    tmp_set.add("griego")
    tmp_set.add("hebreo")
    tmp_set.add("húngaro")
    tmp_set.add("irlandés")
    tmp_set.add("italiano")
    tmp_set.add("japonés")
    tmp_set.add("letón")
    tmp_set.add("lituano")
    tmp_set.add("noruego")
    tmp_set.add("polaco")
    tmp_set.add("portugués")
    tmp_set.add("rumano")
    tmp_set.add("ruso")
    tmp_set.add("escocés")
    tmp_set.add("serbio")
    tmp_set.add("eslovaco")
    tmp_set.add("esloveno")
    tmp_set.add("sueco")
    tmp_set.add("turco")
    tmp_set.add("ucraniano")
    my_spanish_languages_list = list(tmp_set)
    del tmp_set
    my_spanish_languages_list.sort()

    for language in my_spanish_languages_list:
        s = "Traducción del " + language
        my_spanish_keywords_set.add(s)


    global my_english_languages_list
    tmp_set = set()
    tmp_set.add("Albanian")
    tmp_set.add("Arabian")
    tmp_set.add("Bosnian")
    tmp_set.add("Bulgarian")
    tmp_set.add("Chinese")
    tmp_set.add("Croatian")
    tmp_set.add("Czech")
    tmp_set.add("Danish")
    tmp_set.add("Dutch")
    tmp_set.add("English")
    tmp_set.add("Estonian")
    tmp_set.add("Finnish")
    tmp_set.add("French")
    tmp_set.add("German")
    tmp_set.add("Greek")
    tmp_set.add("Hebrew")
    tmp_set.add("Hungarian")
    tmp_set.add("Irish")
    tmp_set.add("Italian")
    tmp_set.add("Japanese")
    tmp_set.add("Latvian")
    tmp_set.add("Lithuanian")
    tmp_set.add("Norwegian")
    tmp_set.add("Polish")
    tmp_set.add("Portuguese")
    tmp_set.add("Romanian")
    tmp_set.add("Russian")
    tmp_set.add("Scottish")
    tmp_set.add("Serbian")
    tmp_set.add("Slovakian")
    tmp_set.add("Slovenian")
    tmp_set.add("Spanish")
    tmp_set.add("Swedish")
    tmp_set.add("Turkish")
    tmp_set.add("Ukrainian")
    my_english_languages_list = list(tmp_set)
    del tmp_set
    my_english_languages_list.sort()


    global my_french_languages_list
    tmp_set = set()
    tmp_set.add("anglais")
    tmp_set.add("allemand")
    tmp_set.add("espagnol")
    tmp_set.add("italien")


    global my_german_languages_list
    tmp_set = set()
    tmp_set.add("Englisch")
    tmp_set.add("Französisch")
    tmp_set.add("Spanisch")
    tmp_set.add("Italienisch")
#----------------------------------------------------------------------------------------------------------------
def JS_Control(my_cursor,my_db,log,notifications):

    if DEBUG: print("JS Control")

    global my_book_ids
    global my_param_dict
    global my_guidb

    #-----------------------------------------------------------------
    #-----------------------------------------------------------------

    library_path = my_guidb.library_path

    #-----------------------------------------------------------------
    #-----------------------------------------------------------------
    global format_stats_dict
    format_stats_dict.clear()
    format_stats_dict['EPUB'] = 0
    format_stats_dict['UNSUPPORTED'] = 0
    format_stats_dict = collections.OrderedDict(sorted(format_stats_dict.items()))
    #-----------------------------------------------------------------
    #-----------------------------------------------------------------
    #-----------------------------------------------------------------
    cc_list = build_custom_column_list(my_cursor,my_db,log,notifications)
    build_param_dict(cc_list)
    #-----------------------------------------------------------------
    n_total = len(my_book_ids)
    if n_total == 0:
        log(" ")
        log("No Books Found by Job.  Terminating.")
        return
    log("-------------------------------------------")
    log(" ")
    n = n_total
    s = '{:,}'.format(n)
    log("Number of selected books for which to Extract Original Title/Translator:     " + as_unicode(s) )
    log(" ")
    log("Priority sequence in which book formats will be searched until one is found to use:     " + SUPPORTED_BOOK_FORMATS_STRING)
    log(" ")
    log("═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════")
    #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    n_done = 0
    for book in my_book_ids:
        current_book = as_unicode(book)
        original_title,translator,errors,full_book_path = extract_data_single_book(my_cursor,my_db,log,notifications,current_book,library_path)
        if errors == "":
            if original_title or translator:
                update_custom_columns(my_cursor,my_db,log,notifications,current_book,original_title,translator)
        else:
            log("ERRORS for current book: " + as_unicode(current_book) + "  >>>" + errors)

        del errors
        del current_book

        n_done = n_done + 1

        n_percent = float(n_done/n_total)

        i = full_book_path.rfind("/")
        full_book_path = full_book_path[i+1: ]
        if len(full_book_path) < 6:  #no path to show due to format issue
            full_book_path = "[A Book That Has No Supported Format]"
        notifications.put((n_percent, ("Finished: " + as_unicode(full_book_path) ) ) )
        del full_book_path
        log(" ")
        if n_done != n_total:
            log("_______________________________________________________________________________________________")
        log(" ")
        sleep(0)
    #-------------------------------------
    #END FOR
    #-------------------------------------

    #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    log("═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════")

    #-----------------------------------------------------------------
    #-----------------------------------------------------------------

    log("_______________________________________________________________________________________________")
    log(" ")
    #~ for k,v in format_stats_dict.iteritems():
    for k,v in iteritems(format_stats_dict):
        k = k + "                           "
        k = k[0:16]
        s = '{:,}'.format(v)
        log("Format: " + k + " Books: " + as_unicode(s))
    log(" ")
    log("_______________________________________________________________________________________________")
    log(" ")
    log("'Extract Original Title/Translator' has completed.")
    log(" ")

    format_stats_dict.clear()
#----------------------------------------------------------------------------------------------------------------
def extract_data_single_book(my_cursor,my_db,log,notifications,current_book,library_path):

    if DEBUG: print("extract_data_single_book")

    global format_stats_dict

    original_title = None
    translator = None

    errors = ""
    my_html_text = ""

    try:
        full_book_path,format_to_use,errors = build_book_path(my_cursor,my_db,log,notifications,current_book,library_path)
        if errors == "" and format_to_use != "" :
            log("Book: " + as_unicode(full_book_path))
            n_current_num = format_stats_dict[format_to_use]
            format_stats_dict[format_to_use] = n_current_num + 1
            my_html_text,errors = load_book_file(log,notifications,full_book_path,format_to_use)
            if errors == "":
                sleep(0)
                original_title,translator = analyze_text(my_html_text,log)
            else:
                pass
        else:
            errors = ("No Supported Format Found for Current Book: " + as_unicode(current_book))
            n_current_num = format_stats_dict['UNSUPPORTED']
            format_stats_dict['UNSUPPORTED'] = n_current_num + 1

        del my_html_text
    except Exception as e:
        errors = as_unicode(e)

    return original_title,translator,errors,full_book_path
#----------------------------------------------------------------------------------------------------------------
def update_custom_columns(my_cursor,my_db,log,notifications,current_book,original_title,translator):

    global my_param_dict

    original_title_cc_table = my_param_dict['original_title_cc_table']
    translator_cc_table = my_param_dict['translator_cc_table']

    if DEBUG: print("These MUST be 'Long Text, like Comments (NOT shown in the Tag Browser) - Interpret this Column as Short Text, Like a Title':")
    if DEBUG: print("original_title_cc_table: " + original_title_cc_table + "     " + "translator_cc_table: " + translator_cc_table)

    update_ot = False
    update_t = False

    if original_title_cc_table != "none":
        if not original_title:
            pass
        else:
            if not original_title > " ":
                pass
            else:
                update_ot = True

    if translator_cc_table != "none":
        if not translator:
            pass
        else:
            if not translator > " ":
                pass
            else:
                update_t = True

    if not update_ot and not update_t:
        log("No keywords found in this book.  Nothing done.")
        return

    if update_ot:
        mysql = "INSERT OR REPLACE INTO [ORIGINAL_TITLE_CC_TABLE] (id,book,value) VALUES (NULL,?,?) "
        mysql = mysql.replace("[ORIGINAL_TITLE_CC_TABLE]",original_title_cc_table)
        my_cursor.execute("begin")
        my_cursor.execute(mysql,(current_book,original_title))
        my_cursor.execute("commit")
        log("Original Title changed to: ", original_title)

    if update_t:
        mysql = "INSERT OR REPLACE INTO [TRANSLATOR_CC_TABLE] (id,book,value) VALUES (NULL,?,?) "
        mysql = mysql.replace("[TRANSLATOR_CC_TABLE]",translator_cc_table)
        my_cursor.execute("begin")
        my_cursor.execute(mysql,(current_book,translator))
        my_cursor.execute("commit")
        log("Translator changed to:     ", translator)
#----------------------------------------------------------------------------------------------------------------
def analyze_text(my_html_text,log):

    #~ ----------------------------------------------------------------
    global my_spanish_languages_list
    global my_english_languages_list
    global my_french_languages_list
    global my_german_languages_list
    #----------------------------------------------
    global my_spanish_keywords_set
    global my_english_keywords_set
    global my_french_keywords_set
    global my_german_keywords_set
    #----------------------------------------------
    book_is_english = False
    book_is_spanish = False
    book_is_french = False
    book_is_german = False
    book_is_other = False
    #~ ----------------------------------------------------------------

    s = prefs['GUI_TOOLS_EXTRACT_ORIGINAL_TITLE_KEYWORD']
    s_split = s.split("|")
    original_title_keyword_list = []
    for row in s_split:
        row = row.strip()
        if row > " ":
            original_title_keyword_list.append(row)
    #END FOR
    original_title_keyword_list.sort(key=lambda t: len(t[1]), reverse=True)

    s = prefs['GUI_TOOLS_EXTRACT_TRANSLATOR_KEYWORD']
    s_split = s.split("|")
    translator_keyword_list = []
    for row in s_split:
        row = row.strip()
        if row > " ":
            translator_keyword_list.append(row)
    #END FOR
    translator_keyword_list.sort(key=lambda t: len(t[1]), reverse=True)    # want to do .replace() on the longer kws before the shorter kws since shorter kws may be a substring of the longer kws, leaving artifacts after the .replace()

    original_title = ""
    translator = ""
    #---------------------------------------------------------
    #---------------------------------------------------------

    if DEBUG: print("===============>>>rows in html: " + as_unicode(len(my_html_text)))

    if isinstance(my_html_text,list):
        my_html_text = "".join(my_html_text)

    if DEBUG: print("===============>>>my_html_text: ", as_unicode(my_html_text))

    html_list = my_html_text.split("<$STARTDATA$>")

    r = -1
    for row in html_list:
        original_row = row
        if DEBUG: print(row)
        r = r + 1
        #~ -----------------------------------
        for kw in original_title_keyword_list:          # several variations:  (1) kw on first row, and the desired data on the next row; (2) both kw and data on the first row.
            kw = kw.replace(":","").strip()
            kw = kw.replace(".","").strip()
            if kw in row:
                row = row.strip()                                      # <$ENDDATA$><p>[]T<p>Título original: Helliconia Summer</p><$ENDDATA$><p>
                row = row.replace("<$ENDDATA$>"," ")
                row = row.replace(":"," ")
                row = row.replace("."," ")
                row = row.replace("  "," ")
                n = row.find("<")
                if n > 0:
                    row = row[0:n].strip()
                n = row.find(">")
                if n > 0:
                    row = row[0:n].strip()
                max = len(kw) + 3  #tolerance
                if len(row) <= max:                          # row 0:  Título original:              row 1:   Helliconia Summer
                    original_title = html_list[r+1]
                    original_title = original_title.replace("<$ENDDATA$>"," ")
                    original_title = original_title.replace("."," ")
                    original_title = original_title.replace("  "," ")
                else:
                    original_title = row                              # Título original: Helliconia Summer
                original_title = original_title.replace(kw,"").strip()                     # Helliconia Summer
                if original_title.startswith(":"):
                    original_title = original_title[1: ].strip()
                if original_title.startswith(","):
                    original_title = original_title[1: ].strip()
                original_title = original_title.title()
                if DEBUG: print("Found---Original Title:", original_title)
                break
        #END FOR
        #~ -----------------------------------
        row = original_row
        for kw in translator_keyword_list:                   # several variations:  (1) kw on first row, and the desired data on the next row; (2) both kw and data on the first row.
            kw = kw.replace(":","").strip()
            kw = kw.replace(".","").strip()
            kw = kw.replace(",","").strip()
            if kw in row:
                if kw in my_spanish_keywords_set:
                    book_is_spanish = True
                elif kw in my_english_keywords_set:
                    book_is_english = True
                elif kw in my_french_keywords_set:
                    book_is_french = True
                elif kw in my_german_keywords_set:
                    book_is_german = True
                else:
                    book_is_other = True
                for item in translator_keyword_list:
                    if kw != item:
                        row = row.replace(item,"")  #for complex search terms, such as:  "xxxxx del ingles por" where it also matches another keyword of just xxxxx
                row = row.strip()                                      # <$ENDDATA$><p>[]Traducción: Joan Josep Mussarra.<$ENDDATA$><p>
                row = row.replace("<$ENDDATA$>","")
                row = row.replace(":"," ")
                row = row.replace("."," ")
                row = row.replace("  "," ")
                n = row.find("<")
                if n > 0:
                    row = row[0:n].strip()
                n = row.find(">")
                if n > 0:
                    row = row[0:n].strip()
                max = len(kw) + 3  #tolerance
                if len(row) <= max:                          # row 0:  Traducción:              row 1:   Joan Josep Mussarra
                    translator = html_list[r+1]
                    translator = translator.replace("<$ENDDATA$>","")
                    translator = translator.replace(":","")
                    translator = translator.replace(".","")
                else:
                    translator = row                              # Traducción: Joan Josep Mussarra
                translator = translator.replace(kw,"").strip()                     # Joan Josep Mussarra
                #~----------------------------------------------------------------
                if book_is_spanish:
                    for language in my_spanish_languages_list:
                        s = "del " + language
                        translator = translator.replace(s,"")
                        translator = translator.replace(language,"")
                    #END FOR
                    for item in my_spanish_keywords_set:
                        translator = translator.replace(item,"")
                elif book_is_french:
                    for language in my_french_languages_list:
                        s = "de l'" + language
                        translator = translator.replace(s,"")
                        translator = translator.replace(language,"")
                    #END FOR
                    for item in my_french_keywords_set:
                        translator = translator.replace(item,"")
                elif book_is_german:
                    for language in my_german_languages_list:
                        s = "von " + language
                        translator = translator.replace(s,"")
                        translator = translator.replace(language,"")
                    #END FOR
                    for item in my_german_keywords_set:
                        translator = translator.replace(item,"")
                elif book_is_english:
                    for language in my_english_languages_list:
                        s = "from " + language
                        translator = translator.replace(s,"")
                        translator = translator.replace(language,"")
                    #END FOR
                    for item in my_english_keywords_set:
                        translator = translator.replace(item,"")
                else:
                    pass
                #~----------------------------------------------------------------
                translator = translator.replace("©","").strip()
                if translator.startswith(","):
                    translator = translator[1: ].strip()
                #~----------------------------------------------------------------
                if book_is_spanish:
                    translator = translator.replace("(Nota Del )","")
                    translator = translator.replace("(Nota De los es )","")
                    translator = translator.replace("(De Translación)","")
                    translator = translator.replace("Nota Del","")
                    translator = translator.replace("Nota De los es ","")
                    translator = translator.replace("De Translación","")
                    translator = translator.replace("(nota del )","")
                    translator = translator.replace("(nota de los es )","")
                    translator = translator.replace("(de translación)","")
                    translator = translator.replace("nota del","")
                    translator = translator.replace("nota de los es ","")
                    translator = translator.replace("de translación","")
                    if translator.count("¿") > 0 or translator.count(" ! ") > 0 or translator.count(" con ") > 0:  #corrupt data
                        translator = ""
                elif book_is_french:
                    translator = translator.replace("de l'","")
                    translator = translator.replace(" par","")
                elif book_is_german:
                    translator = translator.replace("aus dem","")
                elif book_is_english:
                    translator = translator.replace("by ","")
                else:
                    pass
                #~----------------------------------------------------------------
                if translator.startswith("-") or translator.startswith("—") :
                    translator = translator[1: ].strip()
                if translator.startswith("("):                   #"( De Daniel Najmías)"
                    if translator.endswith(")"):
                        translator = translator[1:-1].strip()
                #~----------------------------------------------------------------
                if book_is_spanish:
                    if translator.startswith("del ") or translator.startswith("Del "):
                        translator = translator[2: ].strip()
                    if translator.startswith("de ") or translator.startswith("De "):
                        translator = translator[2: ].strip()
                elif book_is_french:
                    if translator.startswith("par ") or translator.startswith("Par "):
                        translator = translator[3: ].strip()
                elif book_is_german:
                    if translator.startswith("von ") or translator.startswith("Von "):
                        translator = translator[3: ].strip()
                elif book_is_english:
                    if translator.startswith("by ") or translator.startswith("By "):
                        translator = translator[2: ].strip()
                #~----------------------------------------------------------------
                translator = translator.title().strip()
                if len(translator) > 500:    #corrupt data
                    translator = ""
                if DEBUG: print("Found---Translator:", translator)
                break
        #END FOR
    #END FOR

    if original_title == "":
        original_title = None
    if translator == "":
        translator = None

    return original_title,translator
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
def load_book_file(log,notifications,path,format_to_use):

    if DEBUG: print("Loading book file: ", path)

    my_html_text = ""
    errors = ""

    if format_to_use == "EPUB":
        my_html_text,errors = load_epub_file(log,notifications,path)
        del format_to_use
        del path
        return my_html_text,errors

    del format_to_use
    del path

    return my_html_text,errors
#-------------------------------------------------------------------------------------------------------------------------------------
def load_epub_file(log,notifications,path):

    if DEBUG: print("Loading epub file: ", path)

    my_html_text = ""
    errors = ""

    path = path.replace(os.sep,"/")
    path = as_unicode(path)

    my_html_text,errors = extract_epub_text(path,log)

    del path

    return my_html_text,errors
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
def build_book_path(my_cursor,my_db,log,notifications,current_book,library_path):
    #   "S:\Calibre\QS\QuarantineAndScrub_Test6\Bella Andre\I Love How You Love Me_ The Sulliva (3295)\I Love How You Love Me_ The Sul - Bella Andre.epub"
    #   path from table books:   Bella Andre/I Love How You Love Me_ The Sulliva (3295)
    #   format and book file name from data:   "id","3295","EPUB","5452","I Love How You Love Me_ The Sul - Bella Andre"

    if DEBUG: print("Building book path")

    full_book_path = ""
    path_to_use = ""
    format_to_use = ""
    name = ""
    errors = ""

    mysql = 'SELECT path FROM books WHERE id = ?'
    my_cursor.execute(mysql,([current_book]))
    tmp = my_cursor.fetchall()
    if not tmp:
        errors = "NO BOOKS.PATH FOUND"
        return full_book_path,format_to_use, errors
    else:
        if len(tmp) == 0:
            errors = "NO BOOKS.PATH FOUND"
            return full_book_path,format_to_use, errors
        else:
            for row in tmp:
                for col in row:
                    path_to_use = col

    mysql = "SELECT format,name FROM data WHERE book = ? AND format = 'EPUB' "
    my_cursor.execute(mysql,([current_book]))
    tmp = my_cursor.fetchall()
    if not tmp:
        errors = "NO DATA.FORMAT FOUND"
    else:
        if len(tmp) == 0:
            errors = "NO DATA.FORMAT FOUND"
        else:
            for row in tmp:
                format,name = row
                if format in SUPPORTED_BOOK_FORMATS:
                    format_to_use = format
                    break
            del tmp

    s_lower = format_to_use.lower()

    name = name + "." + s_lower

    path_to_use = os.path.join(path_to_use,name)

    full_book_path = os.path.join(library_path,path_to_use)

    full_book_path = full_book_path.replace(os.sep,"/")

    #   "S:/Calibre/QS/QuarantineAndScrub_Test8/Bella Andre/I Love How You Love Me_ The Sulliva (3295)/I Love How You Love Me_ The Sul - Bella Andre.epub"

    del path_to_use
    del name
    del s_lower

    return full_book_path,format_to_use, errors
#-------------------------------------------------------------------------------------------------------------------------------------
def extract_epub_text(epub_path,log):
    #~ <body>
      #~ <div class="info">
        #~ <p>Título original: <em>Rivers of London</em></p>
        #~ <p>Ben Aaronovitch, 2012.</p>
        #~ <p>Traducción: Joan Josep Mussarra.</p>
        #~ <p class="salto10">Editor original: zxcvb66 (v1.0)</p>
        #~ <p>ePub base v2.0</p>
      #~ </div>
    #~ </body>

    if DEBUG: print("Extracting epub text: ", epub_path)

    errors = ""
    my_list = []
    my_html_content = ""
    file_handle = ""
    html_list = ""

    try:
        file_handle = ZipFile(epub_path)
        name_list = file_handle.namelist()
        html_list = []
        #END FOR
        for name in name_list:
            if name.endswith("html"):
                zinfo = file_handle.getinfo(name)
                html_list.append(zinfo)
        #END FOR
        if DEBUG: print("number of html files within epub zip: ", as_unicode(len(html_list)))
        if len(html_list) == 0:
            pass
        else:
            html_list = sort_with_embedded_digits(html_list)
            my_html_content = JSHTMLGetContent()
            for html_ in html_list:
                try:
                    data = file_handle.read(html_)
                    data = as_unicode(data)
                    my_html_content.feed(data)
                except Exception as e:
                    log("[0] extract_epub_text: " + as_unicode(e))
                    errors = as_unicode(e)
                my_html_text = my_html_content.content
            #END FOR
            my_list = list(my_html_content.content)
    except Exception as e:
        log("[1] extract_epub_text: " + as_unicode(e))
        errors = as_unicode(e)

    try:
        del my_html_content
        del file_handle
        del html_list
        del epub_path
    except:
        pass

    return my_list, errors
#-------------------------------------------------------------------------------------------------------------------------------------
def build_custom_column_list(my_cursor,my_db,log,notifications):
    cc_list = []
    mysql = "SELECT id,label,datatype,is_multiple,normalized FROM custom_columns"
    my_cursor.execute(mysql)
    tmp_rows = my_cursor.fetchall()
    if not tmp_rows:
        tmp_rows = []
    for row in tmp_rows:
        cc_list.append(row)
    #END FOR
    del tmp_rows
    return cc_list
#-------------------------------------------------------------------------------------------------------------------------------------
def build_param_dict(cc_list):

    global my_param_dict

    #~ my_param_dict['ORIGINAL_TITLE_SEARCH_STRING'] = prefs['GUI_TOOLS_EXTRACT_ORIGINAL_TITLE_KEYWORD']
    #~ my_param_dict['TRANSLATOR_SEARCH_STRING'] = prefs['GUI_TOOLS_EXTRACT_TRANSLATOR_KEYWORD']

    my_param_dict['GUI_TOOLS_EXTRACT_ORIGINAL_TITLE_CUSTOM_COLUMN'] = prefs['GUI_TOOLS_EXTRACT_ORIGINAL_TITLE_CUSTOM_COLUMN'].replace("#","").strip()
    my_param_dict['GUI_TOOLS_EXTRACT_TRANSLATOR_CUSTOM_COLUMN'] = prefs['GUI_TOOLS_EXTRACT_TRANSLATOR_CUSTOM_COLUMN'].replace("#","").strip()

    original_title_cc = ""
    translator_cc = ""

    for row in cc_list:
        id,label,datatype,is_multiple,normalized = row
        if DEBUG: print(label,datatype)
        if datatype == "comments":
            if label == my_param_dict['GUI_TOOLS_EXTRACT_ORIGINAL_TITLE_CUSTOM_COLUMN']:
                original_title_cc = as_unicode(id)
            elif label == my_param_dict['GUI_TOOLS_EXTRACT_TRANSLATOR_CUSTOM_COLUMN']:
                translator_cc = as_unicode(id)
    #END FOR

    if original_title_cc.isdigit():
        s = "custom_column_[N]"
        s = s.replace("[N]",original_title_cc)
        my_param_dict['original_title_cc_table'] = s
    else:
        my_param_dict['original_title_cc_table'] = "none"
    if translator_cc.isdigit():
        s = "custom_column_[N]"
        s = s.replace("[N]",translator_cc)
        my_param_dict['translator_cc_table'] = s
    else:
        my_param_dict['translator_cc_table'] = "none"

    ott = my_param_dict['original_title_cc_table']
    tt = my_param_dict['translator_cc_table']

    if DEBUG: print("original_title_cc_table: " + ott)
    if DEBUG: print("translator_cc_table: " + tt)

    return my_param_dict
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
re_digits = re.compile(r'(\d+)')
def embedded_digits(html_list):
    html_parts = re_digits.split(html_list)
    html_parts[1::2] = list(map(int, html_parts[1::2]))
    return html_parts
#-------------------------------------------------------------------------------------------------------------------------------------
def sort_with_embedded_digits(zipinfo_list):
    data = [(embedded_digits(zipinfo.filename), zipinfo) for zipinfo in zipinfo_list]
    data.sort()
    return [zipinfo for _, zipinfo in data]
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------
#END of job