# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2023 DaltonST'
__my_version__ = "1.0.0"  #New

import apsw, os
from copy import deepcopy
from queue import Queue
import re, sre_constants

from calibre import isbytestring
from calibre.constants import filesystem_encoding, DEBUG
from calibre.gui2.ui import get_gui
from calibre.utils.html2text import html2text
from calibre.utils.logging import Log as log
from calibre.utils.zipfile import is_zipfile as zipfile_is_zipfile

from polyglot.builtins import iteritems

from calibre_plugins.extract_people_other_metadata.heading import log_heading_common

header_s1 = None
header_s2 = None
header_s3 = None
header_s4 = None
header_s5 = None

#~ Tweak Keys
REMOVE_CHARACTERS =  "REMOVE_CHARACTERS"
REMOVE_CITATION =  "REMOVE_CITATION"
REMOVE_NUMBERS =  "REMOVE_NUMBERS"
REMOVE_LETTERS = "REMOVE_LETTERS"
REMOVE_PUNCTUATION =  "REMOVE_PUNCTUATION"
REMOVE_UNICODE_DASHES = "REMOVE_UNICODE_DASHES"
REMOVE_UNICODE_DOTS = "REMOVE_UNICODE_DOTS"
REMOVE_STARTSWITH = "REMOVE_STARTSWITH"
REMOVE_ENDSWITH = "REMOVE_ENDSWITH"
INSERT_SEPARATOR_BEFORE = "INSERT_SEPARATOR_BEFORE"
MINIMUM_LENGTH_TO_ACCEPT = "MINIMUM_LENGTH_TO_ACCEPT"
MAXIMUM_LENGTH_TO_ACCEPT = "MAXIMUM_LENGTH_TO_ACCEPT"
ADDITIONAL_SEARCH_REGEX =  "ADDITIONAL_SEARCH_REGEX"
ADDITIONAL_FINDITER_REGEX = "ADDITIONAL_FINDITER_REGEX"
CALIBRE_TEMPLATE_LANGUAGE_BUILTIN = "CALIBRE_TEMPLATE_LANGUAGE_BUILTIN"

NUMERIC_VALUES = ['0','1','2','3','4','5','6','7','8','9']

maingui = get_gui()

#---------------------------------------------------------------------------------------------------------------------------------------
def epom_main(guidb, plugin_path, book_ids_list, param_dict, active_cc_list, template_functions, log=None, abort=None, notifications=True):

    #--------------------------------------------------
    log("Starting 'Extra People & Other Metadata'")
    notifications.put((0.002, 'Beginning Extra People & Other Metadata'))
    #--------------------------------------------------
    path = guidb.library_path
    if isbytestring(path):
        path = path.decode(filesystem_encoding)
    path = path.replace(os.sep, '/')
    path = os.path.join(path, 'metadata.db')
    path = path.replace(os.sep, '/')

    log("Library DB: " + path)

    try:
        my_db =apsw.Connection(path, flags=apsw.SQLITE_OPEN_READONLY)
    except Exception as e:
        log(str(e))
        raise e
        return
    my_db.close()
    #--------------------------------------------------
    global header_s1
    global header_s2
    global header_s3
    global header_s4
    global header_s5
    header_s1 = "SQLite Version: " + str(apsw.SQLITE_VERSION_NUMBER) + "    [APSW]"
    header_s2 = "SQLite is Read-Only"
    header_s3 = "Beginning 'Extract People & Other Metadata' Processing"
    log_heading_common(log,header_s1,header_s2,header_s3,header_s4,header_s5)
    #--------------------------------------------------
    my_book_ids_list = deepcopy(book_ids_list)
    del book_ids_list
    my_book_ids_list.sort()

    if len(my_book_ids_list) == 0:
        return None,None

    my_param_dict =  param_dict.copy()  #still ordered dict
    del param_dict

    db = maingui.current_db

    #--------------------------------------------------
    #--------------------------------------------------
    total_results_list = epom_control(guidb,my_param_dict,active_cc_list,my_book_ids_list,template_functions,log,notifications)
    #--------------------------------------------------
    #--------------------------------------------------
    return (my_book_ids_list,total_results_list)
#---------------------------------------------------------------------------------------------------------------------------------------
def epom_control(guidb,my_param_dict,active_cc_list,my_book_ids_list,template_functions,log,notifications):

    use_ftsdb = my_param_dict['EPOM_USE_FTS_INDEX']

    if use_ftsdb is True:
        my_ftsdb,my_ftscursor,db_exist_error = apsw_connect_to_current_ftsdb(guidb)
        if my_ftsdb is None or my_ftscursor is None or db_exist_error is True:
            use_ftsdb = False
    else:
        my_ftsdb = None
        my_ftscursor = None

    book_path_dict = get_all_book_paths(guidb,my_book_ids_list,log)

    #--------------------------------------------------
    total_results_list = book_control(guidb,use_ftsdb,my_ftsdb,my_ftscursor,my_param_dict,active_cc_list,my_book_ids_list,book_path_dict,template_functions,log,notifications)
    #--------------------------------------------------
    log(" ")
    log(" ")
    log("Job complete.")

    if use_ftsdb:
        my_ftsdb.close()

    #--------------------------------------------------
    return total_results_list
#---------------------------------------------------------------------------------------------------------------------------------------
def book_control(guidb,use_ftsdb,my_ftsdb,my_ftscursor,my_param_dict,active_cc_list,my_book_ids_list,book_path_dict,template_functions,log,notifications):
    total_results_list = []
    #--------------------------------------------------
    n0 = len(my_book_ids_list)
    n1 = 0
    for current_book in my_book_ids_list:
        current_book = int(current_book)
        raw_text,source = get_book_text(guidb,use_ftsdb,my_ftsdb,my_ftscursor,current_book,book_path_dict,my_param_dict,log)
        results_list = extract_metadata_from_text_control(guidb,my_param_dict,active_cc_list,current_book,raw_text,source,template_functions,log)
        total_results_list.extend(results_list)
        del raw_text
        del source
        del results_list
        n1 = n1 + 1
        pc = float(n1/n0)
        notifications.put((pc, 'Extracting Metadata from Books'))
    #END FOR
    #--------------------------------------------------
    return total_results_list
#---------------------------------------------------------------------------------------------------------------------------------------
def get_book_text(guidb,use_ftsdb,my_ftsdb,my_ftscursor,current_book,book_path_dict,my_param_dict,log):

    raw_text = ""
    source=""

    if use_ftsdb:
        raw_text,source = get_ftsdb_raw_text(my_ftsdb,my_ftscursor,current_book,log)

    if source == "":
        raw_text,source = get_format_raw_text(guidb,current_book,book_path_dict,my_param_dict,log)

    return raw_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
def get_ftsdb_raw_text(my_ftsdb,my_ftscursor,current_book,log):

    current_book = int(current_book)
    source = ""
    raw_text = ""

    mysql = "SELECT book,format,searchable_text FROM books_text WHERE book = ? AND (format = 'EPUB' OR format = 'TXT')  ORDER BY format ASC"
    for book,format,searchable_text in my_ftscursor.execute(mysql,([current_book]) ):
        if searchable_text is not None:
            raw_text = searchable_text
            source = "ftsdb"
            del searchable_text
            break
    #END FOR

    return raw_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
def get_format_raw_text(guidb,current_book,book_path_dict,my_param_dict,log):

    raw_text = ""
    source = ""

    if not current_book in book_path_dict:  #no formats...
        return raw_text,source

    try:
        path,name,format = book_path_dict[current_book]
    except: #no supported format (e.g. HTMLZ), but in dict..
        if DEBUG: print("No supported format found for current book: ", str(book_path_dict[current_book]))
        log("No supported format found for current book: " + str(book_path_dict[current_book]))
        return raw_text,source

    format = format.lower().strip()
    book_path = path.replace('/', os.sep)+os.sep + name + "." + format
    lib_path = guidb.library_path.replace('/', os.sep)+os.sep
    full_book_path = os.path.join(lib_path,book_path)
    full_book_path = full_book_path.replace(os.sep,"/").strip()
    full_book_path = str(full_book_path)

    if full_book_path.endswith(".txt"):
        raw_text,source = extract_text_from_txt(full_book_path,log)
    elif full_book_path.endswith(".epub"):
        if zipfile_is_zipfile(full_book_path):
            raw_text,source = extract_text_from_epub(full_book_path,log)
        else:
            log("invalid epub zipfile:  " + full_book_path)
    else:
        msg = "[2] Warning:  " + str(current_book) + "    " + full_book_path + "  has neither EPUB nor TXT format; nothing to do. "
        log(msg)
        log(" ")
        if DEBUG: print(msg)

    del full_book_path

    return raw_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
def extract_text_from_txt(full_book_path,log):

    raw_text = ""
    source = "txt"

    try:
        with open(full_book_path, 'r') as f:
            raw_text_list = f.readlines()
        f.close()
        del f
        for t in raw_text_list:
            t = str(t)
            raw_text = raw_text + t + "\n"
        del raw_text_list
        del t
    except Exception as e:
        msg = "TXT File Open/Read: ERROR: " + full_book_path + "    " + str(e)
        log(msg)
        if DEBUG: print(msg)

    return raw_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
def extract_text_from_epub(full_book_path,log):

    from calibre_plugins.extract_people_other_metadata import epub  # https://pypi.org/project/EbookLib

    raw_text = ""
    source = "epub"
    try:
        book = epub.read_epub(full_book_path)
        items = list(book.get_items_of_type(epub.ITEM_DOCUMENT)) # list of ebooklib.epub.EpubHtml objects
        for doc in items:
            contents = epub.EpubItem.get_content(doc)
            raw = html2text(contents)
            raw_text = raw_text + raw.strip()
        #END FOR
        del epub
        del book
        del items
        del contents
        del raw
    except Exception as e:
        if DEBUG: print("ERROR:  extract_text_from_epub: " + str(e))
        log("ERROR:  extract_text_from_epub: " + str(e))

    return raw_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def extract_metadata_from_text_control(guidb,my_param_dict,active_cc_list,current_book,raw_text,source,template_functions,log):

    results_list = []    #  row = current_book,custom_column,matched_text,source

    if raw_text == "":
        return results_list

    orig_raw_text = deepcopy(raw_text)

    for row in active_cc_list:
        custom_column,keywords,regex,tweaks = row
        if DEBUG: print(custom_column, keywords, regex)
        matched_text,source = search_raw_text(my_param_dict,custom_column,keywords,regex,orig_raw_text,source,log)
        tweaked_text = apply_tweaks(guidb,current_book,custom_column,keywords,tweaks,matched_text,template_functions,log)
        final = current_book,custom_column,tweaked_text,source
        results_list.append(final)
        n = len(str(tweaked_text))
        if n > 200:
            tweaked_text = str(tweaked_text)
            tweaked_text = tweaked_text[0:200] + "........."
        info = str(current_book) + "  " + "Source: " + source.upper() + "  " + custom_column + "   "  + str(tweaked_text)
        log(" ")
        log(info)
        log(" ")
        if DEBUG: print(info)
        del matched_text
        del final
        del info
    #END FOR

    del raw_text
    del orig_raw_text

    return results_list
#---------------------------------------------------------------------------------------------------------------------------------------
def search_raw_text(my_param_dict,custom_column,keywords,regex,orig_raw_text,source,log):
    #~ By default the '^' and '$' symbol match the beginning and end, respectively, of a string.
    #~ By default, '^' and '$' would consider the whole text of a book as a single string and completely defeat their purpose.
    #~ re.MULTILINE changes that behavior so each line with an "end of line" (CRLF/newline) character is treated individually.
    #~ re.DOTALL makes the '.' operator (Wildcard operator) match all characters, including newline. By default, the '.' operator does not match newline characters.

    if keywords.endswith("|"):
        keywords = keywords[0:-1]

    re.escape("\\")

    flags = get_search_raw_text_flags(my_param_dict,custom_column)

    if DEBUG: print("==========>>>search_raw_text    flags: " + str(flags) )

    try:
        matched_text = ""

        kw = keywords.strip()

        if DEBUG: print("Keyword: " + kw )

        if not kw > " ":
            if DEBUG: print(" if not kw >  " )
            return matched_text,source

        if flags != 0:
            results_list = re.finditer(kw,orig_raw_text,flags)
        else:
            results_list = re.finditer(kw,orig_raw_text)

        if results_list is not None:
            for match_obj in results_list:
                if match_obj is not None:
                    s = match_obj.group()
                    s = str(s)
                    if s > " ":
                        matched_text = matched_text + s
                    del s
            #END FOR
            del results_list
            matched_text = matched_text.strip()
            if DEBUG: print("KEYWORDS FINDITER MATCHED...", matched_text)
            if matched_text > " ":
                regex = regex.strip()
                if regex > " ":
                    if flags != 0:
                        r = re.compile(regex,flags)
                    else:
                        r = re.compile(regex)
                    match = r.search(matched_text)
                    if match is not None:
                        matched_text = match.group()
                        matched_text = matched_text.strip()
                    else:
                        if DEBUG: print("no match for regex: ", regex)
                else:
                    pass
            else:
                pass
        else:
            if DEBUG: print("no match for kw: ", kw)
    except Exception as e:
        msg = "ERROR: in Keywords matching: " + keywords + "    Regular Expression Compilation: " + str(e)
        if DEBUG: print(msg)
        log(msg)

    del orig_raw_text

    #~ if DEBUG: print("final matched_text after kw/regex searches: ", matched_text)

    return matched_text,source
#---------------------------------------------------------------------------------------------------------------------------------------
def get_search_raw_text_flags(my_param_dict,custom_column):

    ignorecase = "re.IGNORECASE"
    multiline = "re.MULTILINE"
    dotall = "re.DOTALL"

    if custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_1_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_1_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_1_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_1_DOTALL']
    elif custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_2_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_2_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_2_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_2_DOTALL']
    elif custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_3_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_3_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_3_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_3_DOTALL']
    elif custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_4_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_4_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_4_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_4_DOTALL']
    elif custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_5_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_5_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_5_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_5_DOTALL']
    elif custom_column  == my_param_dict['EPOM_EXTRACT_PEOPLE_6_CUSTOM_COLUMN']:
        is_ignorecase = my_param_dict['EPOM_EXTRACT_PEOPLE_6_IGNORECASE']
        is_multiline = my_param_dict['EPOM_EXTRACT_PEOPLE_6_MULTILINE']
        is_dotall = my_param_dict['EPOM_EXTRACT_PEOPLE_6_DOTALL']
    else:
        return 0

    #~ sequence will always be:  I,M,D

    flags = 0

    if is_ignorecase:
      flags = re.IGNORECASE
    if is_multiline:
        if is_ignorecase:
            flags |= re.MULTILINE
        else:
            flags = re.MULTILINE
    if is_dotall:
        if is_ignorecase or is_multiline:
            flags |= re.DOTALL
        else:
            flags = re.DOTALL

    return flags
#---------------------------------------------------------------------------------------------------------------------------------------
def apply_tweaks(guidb,current_book,custom_column,keywords,tweaks,matched_text,template_functions,log):
    tweaked_text = matched_text
    tweaks_list = create_tweaks_list(tweaks)
    for tweak in tweaks_list:
        tweak = tweak.strip()
        if tweak.startswith("#"):
            continue
        if not "=" in tweak:
            continue
        tweak_key,tweak_value,flags = get_tweak_key_value_from_tweak(tweak,log)
        if str(tweak_value) > " ":  #could be active but has no specified action due to user error...
            tweaked_text = do_single_tweak(guidb,current_book,keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log)
        if DEBUG: print("applying tweaks:  currently tweaked_text is: " + tweaked_text)
        if not tweaked_text > " ":
            break
    #END FOR
    del matched_text
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def create_tweaks_list(tweaks):
    tweaks_list = []
    tmp_list = tweaks.split("\n")
    for row in tmp_list:
        rows = row.strip()
        if not row.startswith("#"):
            tweaks_list.append(row)
    #END FOR
    #~ Caution: do not sort the list; tweaks must be executed in the order received as set by the user.
    return tweaks_list
#---------------------------------------------------------------------------------------------------------------------------------------
def get_tweak_key_value_from_tweak(tweak,log):

    tweak_key = ""
    tweak_value = ""

    try:
        s = tweak.split("=",1)
        tweak_key = s[0].strip()
        if len(s) > 1:
            tweak_value = s[1].strip()
            if ",flags=" in tweak_value:       # ='pattern',flags=IGNORECASE,MULTILINE,DOTALL
                s = tweak_value.split(",flags=")
                tweak_value = s[0].strip()     #  'pattern'
        else:
            tweak_value = ""
        del s
    except Exception as e:
        if DEBUG: print("def get_tweak_key_value_from_tweak(tweak,log): ", tweak, "   ", str(e))
        return "","",0

    if "IGNORECASE" in tweak:
        is_ignorecase = True
    else:
        is_ignorecase = False
    if "MULTILINE" in tweak:
        is_multiline = True
    else:
        is_multiline = False
    if "DOTALL" in tweak:
        is_dotall = True
    else:
        is_dotall = False

    flags = 0
    if is_ignorecase:
      flags = re.IGNORECASE
    if is_multiline:
        if is_ignorecase:
            flags |= re.MULTILINE
        else:
            flags = re.MULTILINE
    if is_dotall:
        if is_ignorecase or is_multiline:
            flags |= re.DOTALL
        else:
            flags = re.DOTALL

    if tweak_key == "REMOVE_(.......)":
        tweak_key = REMOVE_CITATION

    tweak_key = tweak_key.upper()

    if DEBUG: print(tweak_key + "  " + tweak_value + "  " + str(flags))

    return tweak_key,tweak_value,flags
#---------------------------------------------------------------------------------------------------------------------------------------
def do_single_tweak(guidb,current_book,keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log):

    if tweak_key == REMOVE_CHARACTERS:
        tweaked_text = do_remove_characters(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_CITATION:
        tweaked_text = do_remove_citation(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_NUMBERS:
        tweaked_text = do_remove_numbers(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_LETTERS:
        tweaked_text = do_remove_letters(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_PUNCTUATION:
        tweaked_text = do_remove_punctuation(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_UNICODE_DASHES:
        tweaked_text = do_remove_unicode_dashes(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_UNICODE_DOTS:
        tweaked_text = do_remove_unicode_dots(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_STARTSWITH:
        tweaked_text = do_remove_startswith(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == REMOVE_ENDSWITH:
        tweaked_text = do_remove_endswith(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == INSERT_SEPARATOR_BEFORE:
        tweaked_text = do_insert_separator_before(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == MINIMUM_LENGTH_TO_ACCEPT:
        tweaked_text = do_minimum_length_to_accept(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == MAXIMUM_LENGTH_TO_ACCEPT:
        tweaked_text = do_maximum_length_to_accept(keywords,tweak,tweak_key,tweak_value,tweaked_text,log)
    elif tweak_key == ADDITIONAL_SEARCH_REGEX:
        tweaked_text = do_additional_search_regex(keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,log)
    elif tweak_key == ADDITIONAL_FINDITER_REGEX:
        tweaked_text = do_additional_finditer_regex(keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,log)
    elif tweak_key == CALIBRE_TEMPLATE_LANGUAGE_BUILTIN:
        tweaked_text = do_calibre_template_language_builtin(guidb,keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log)
    else:
        msg = "Unknown Tweak Key: " + tweak_key + " in tweak: " + tweak + " with flags: " + flags
        log(msg)

    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_characters(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if DEBUG: print("do_remove_characters: ", tweak_value, "  old tweaked_text: ", tweaked_text)
    orig_text = tweaked_text
    for c in tweak_value:
        c = str(c)
        c = c.strip()
        if tweaked_text.count(c) > 0:
            tweaked_text = tweaked_text.replace(c,"")
    #END FOR
    tweaked_text = tweaked_text.strip()
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_citation(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value == "True":
        pattern = "\(.+?\)"
        tweaked_text = re.sub(pattern,"",tweaked_text)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_numbers(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value == "True":
        orig_text = tweaked_text
        for n in NUMERIC_VALUES:
            tweaked_text = tweaked_text.replace(n,"")
        if tweaked_text != orig_text:
            if DEBUG: print("executed: do_remove_numbers: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_letters(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    orig_text = tweaked_text
    if tweak_value > " ":
        for c in tweak_value:
            if c != ',':
                tweaked_text = tweaked_text.replace(c.upper(),"")
                tweaked_text = tweaked_text.replace(c.lower(),"")
            else:
                continue
    if tweaked_text != orig_text:
        if DEBUG: print("executed:  do_remove_letters: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_punctuation(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value == "True":
        orig_text = tweaked_text
        for c in tweak_value:
            tweaked_text = tweaked_text.replace(c,"")
        if tweaked_text != orig_text:
            if DEBUG: print("executed: do_remove_punctuation: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_unicode_dashes(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if len(tweak_value) > 0:
        orig_text = tweaked_text
        for c in tweak_value:
            tweaked_text = tweaked_text.replace(c,"")
        if tweaked_text != orig_text:
            if DEBUG: print("executed: do_remove_unicode_dashes: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_unicode_dots(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if len(tweak_value) > 0:
        orig_text = tweaked_text
        for c in tweak_value:
            tweaked_text = tweaked_text.replace(c,"")
        if tweaked_text != orig_text:
            if DEBUG: print("executed: do_remove_unicode_dots: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_startswith(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value.startswith("'") and tweak_value.endswith("'"):
        tweak_value = tweak_value[1:-1]
    if tweak_value > "":
        if tweaked_text.startswith(tweak_value):
            n = len(tweak_value)
            tweaked_text = tweaked_text[n: ]
            if DEBUG: print("executed:  do_remove_startswith: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_remove_endswith(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value.startswith("'") and tweak_value.endswith("'"):
        tweak_value = tweak_value[1:-1]
    if tweak_value > "":
        if tweaked_text.endswith(tweak_value):
            n = len(tweak_value)
            tweaked_text = tweaked_text[0:-n]
            if DEBUG: print("executed:  do_remove_endswith: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_insert_separator_before(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    orig_text = tweaked_text
    if tweak_value > " ":
        args = get_non_func_args(2,tweak_value)
        if DEBUG: print(str(args))
        newval = args[1] + args[0]
        tweaked_text = tweaked_text.replace(args[0],newval)
    if tweaked_text != orig_text:
        if DEBUG: print("executed: do_insert_separator_before: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def do_minimum_length_to_accept(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value.isdigit():
        n = int(tweak_value)
        tweaked_text = tweaked_text.strip()
        if len(tweaked_text) < n+1:
            tweaked_text = ""
            if DEBUG: print("executed:  do_minimum_length_to_accept: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_maximum_length_to_accept(keywords,tweak,tweak_key,tweak_value,tweaked_text,log):
    if tweak_value.isdigit():
        n = int(tweak_value)
        tweaked_text = tweaked_text.strip()
        if len(tweaked_text) > n:
            tweaked_text = ""
            if DEBUG: print("executed:  do_maximum_length_to_accept: ", tweak_value)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def do_additional_search_regex(keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,log):
    try:
        if tweak_value.startswith("'"):
            tweak_value = tweaked_text[1:]
        if tweak_value.endswith("'"):
            tweak_value = tweaked_text[:-1]
        tweak_value = str(tweak_value)
        if flags != 0:
            p = re.compile(tweak_value, flags)
        else:
            p = re.compile(tweak_value)
        match = p.search(tweaked_text)
        if match:
            tweaked_text = match.group()
            tweaked_text = tweaked_text.strip()
            #~ if DEBUG: print("MATCH additional_search_regex: " + tweak_value)
    except Exception as e:
        msg = "ERROR: in additional_search_regex matching: " + tweak_value + "    Regular Expression Compilation: " + str(e)
        if DEBUG: print(msg)
        log(msg)
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def do_additional_finditer_regex(keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,log):
    try:
        if tweak_value.startswith("'"):
            tweak_value = tweaked_text[1:]
        if tweak_value.endswith("'"):
            tweak_value = tweaked_text[:-1]
        tweak_value = str(tweak_value)
        try:
            if flags != 0:
                result = re.finditer(tweak_value,tweaked_text,flags)
            else:
                result = re.finditer(tweak_value,tweaked_text)
        except Exception as e:
            msg = "ERROR: in additional_finditer_regex execution: " + tweak_value + flags + "    Regular Expression Compilation: " + str(e)
            if DEBUG: print(msg)
            log(msg)
            return tweaked_text

        #~ if DEBUG: print("re.finditer(tweak_value,tweaked_text,flags)  : " + tweak_value + " , " + flags)

        for match_obj in result:
            if match_obj:
                more_text = match_obj.group()
                #~ if DEBUG: print("MATCH additional_finditer_regex:  more_text... ")
                tweaked_text = tweaked_text + more_text
        #END FOR
        del result
    except Exception as e:
        msg = "ERROR: in additional_finditer_regex matching: " + tweak_value + "    Regular Expression Compilation: " + str(e)
        if DEBUG: print(msg)
        log(msg)

    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def do_calibre_template_language_builtin(guidb,keywords,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log):

    if tweak_value == "":
        return tweaked_text

    orig_text = tweaked_text

    try:
        #arguments are not needed for simple template functions
        tweak_value = tweak_value.replace("()","")        # =titlecase   not    =titlecase()
        tweak_value = tweak_value.replace("(val)","")    # =titlecase   not    =titlecase(val)
        tweak_value = tweak_value.replace("(str)","")    # sometimes used in Calibre Template documentation...
        tweak_value = tweak_value.replace("(a)","")      # sometimes used in Calibre Template documentation...

        if tweak_value in template_functions:
            if DEBUG: print("tweak_value in template_functions: ", tweak_key, "     ",tweak_value, template_functions[tweak_value])
            funcobject = template_functions[tweak_value]
            if funcobject.category.startswith('String'):      #~ Supported:   category = 'String manipulation', category = 'String case changes'
                arg_count = funcobject.arg_count
                if arg_count == 1:
                    tweaked_text = evaluate_simple_builtin(guidb,funcobject,tweak_key,tweak_value,tweaked_text,template_functions,log)  #e.g. titlecase, uppercase, etc. whose arguments are implied and do not vary
                else:
                    tweaked_text,funcobject = evaluate_complex_builtin(guidb,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log)  #general principles
            else:
                log("Unsupported Built-in Template Function: " + tweak)
                if DEBUG: print("Unsupported Built-in Template Function: " + tweak)
        else:
            tweaked_text,funcobject = evaluate_complex_builtin(guidb,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log)  #e.g. re(val,pattern,replacement) where pattern & replacement are random values
    except Exception as e:
        log("Error in Calibre Template Language for: " + tweak + "Msg: " + str(e))

    if not isinstance(tweaked_text,str):
        tweaked_text = str(tweaked_text)

    if tweaked_text != orig_text:
        if DEBUG: print("executed:  do_calibre_template_language_builtin: ", tweak_value)
        if DEBUG: print("tweak_value in template_functions: ", tweak_key, "     ",tweak_value, str(funcobject))

    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def evaluate_simple_builtin(guidb,funcobject,tweak_key,tweak_value,tweaked_text,template_functions,log):
    arg_count = funcobject.arg_count
    if arg_count == 1:
        #~ def evaluate(self, formatter, kwargs, mi, locals, val):
        tweaked_text = funcobject.evaluate(None,None,None,None,tweaked_text)
        return tweaked_text
    return tweaked_text
#---------------------------------------------------------------------------------------------------------------------------------------
def evaluate_complex_builtin(guidb,tweak,tweak_key,tweak_value,flags,tweaked_text,template_functions,log):
    #~ Supported:   category = 'String manipulation'

    funcobject = None

    tweak_value = tweak_value.replace("()","")
    tweak_value = tweak_value.replace("(val)","")

    if tweak_value.startswith("re("):
        name = "re"
        func = "re(val, pattern, replacement)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,3,tweaked_text,name)
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, val, pattern, *args):
            val = tweaked_text
            pattern = args[1]
            replacement = args[2]
            tweaked_text = funcobject.evaluate(None,None,None,None,val,pattern,replacement)
            if replacement == ',':
                if tweaked_text.startswith(replacement):
                    tweaked_text = tweaked_text[1: ].strip()
            return tweaked_text,funcobject

    if tweak_value.startswith("shorten"):
        name = "shorten"
        func = "shorten(val, left chars, middle text, right chars)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,4,tweaked_text,name)
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals,val, leading, center_string, trailing):
            val = tweaked_text
            leading = args[1]
            center_string = args[2]
            trailing = args[3]
            tweaked_text = funcobject.evaluate(None,None,None,None,val,leading,center_string,trailing)
            return tweaked_text,funcobject

    if tweak_value.startswith("strlen"):
        name = 'strlen'
        func = "strlen(val)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,1,tweaked_text,name)
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, val):
            val = tweaked_text
            tweaked_text = funcobject.evaluate(None,None,None,None,val)
            tweaked_text = str(tweaked_text)
            return tweaked_text,funcobject

    if tweak_value.startswith("substr"):
        name = "substr"
        func = "substr(str, start, end)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,3,tweaked_text,name)  # start & end are integers, not text...
            if DEBUG: print(func, "  len(args): ", str(len(args)))
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_)
            val = tweaked_text
            start = args[1]
            end = args[2]
            tweaked_text = funcobject.evaluate(None,None,None,None,val,start,end)
            return tweaked_text,funcobject

    if tweak_value.startswith("swap_around_comma"):
        name = 'swap_around_comma'
        func = "swap_around_comma(val)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,1,tweaked_text,name)
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, val):
            val = tweaked_text
            tweaked_text = funcobject.evaluate(None,None,None,None,val)
            return tweaked_text,funcobject

    if tweak_value.startswith("swap_around_articles"):
        name = 'swap_around_articles'
        func = "swap_around_articles(val)" #only single value, not lists, supported, so no 'separator'...so always arg count mismatch...
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,1,tweaked_text,name)
            #~ if funcobject.arg_count != len(args):
                #~ argument_count_mismatch(tweak,args,log)
                #~ return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, val, separator)
            val = tweaked_text
            separator = None
            tweaked_text = funcobject.evaluate(None,None,None,None,val,separator)
            return tweaked_text,funcobject

    if tweak_value.startswith("swap_around_comma"):
        name = 'swap_around_comma'
        func = "swap_around_comma(val)"
        if name in template_functions:
            funcobject = template_functions[name]
            args = get_args(tweak_value,1,tweaked_text,name)
            if funcobject.arg_count != len(args):
                argument_count_mismatch(tweak,args,log)
                return tweaked_text,funcobject
            #~ def evaluate(self, formatter, kwargs, mi, locals, val):
            val = tweaked_text
            tweaked_text = funcobject.evaluate(None,None,None,None,val)
            tweaked_text = str(tweaked_text)  #int>string
            return tweaked_text,funcobject

    msg = "Unsupported Built-in Function encountered.  Ignoring:  " + tweak
    log(msg)
    if DEBUG: print(msg)

    return tweaked_text,funcobject
#---------------------------------------------------------------------------------------------------------------------------------------
def get_args(func,nargs,tweaked_text,name):
#String Manipulation builtin functions may require an "escaped" comma
#in the values to differentiate them from the commas separating each argument.
#Example:    Wrong:  re(val, '^.+$', 'x,y,z')     Correct:  re(val, '^.+$', 'x\,y\,z')

    #~ if DEBUG: print("==============>>get_args: ", func)
    func = func.strip()
    orig_func = func
    val = "val"

    if "(val," in func:
        newval = "'" + val + "'" + ","
        func = func.replace("val,",newval)
        #~ if DEBUG: print("func1: ", func)           #~ func1:  re('ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0', 'ISBN', ' \, ')
    elif "('val'," in func:
        newval = val
        func = func.replace("'val'",newval)
        #~ if DEBUG: print("func2: ", func)
    args = []
    nl = func.count("(")
    nr = func.count(")")
    if nl > 0 and nr > 0 and nl == nr:
        if nl == 1:                                   #~ example:  "re(val, '^.+$', 'x\,y\,z')"
            s = func.split("(")
            s = s[1]                                   # val, '^.+$', 'x\,y\,z')
            s = s.strip()
            #~ if DEBUG: print("s1: ", s)          #~ s1:  'ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0', 'ISBN', ' \, ')
            s = s[0:-1]
            #~ if DEBUG: print("s2: ", s)          #~  s2: 'ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0', 'ISBN', ' \, '
            s = s.replace("\,","~~~~~")     #in case an escaped comma is in a value itself
            #~ if DEBUG: print("s3: ", s)          #  s3:  'ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0', 'ISBN', ' ~~~~~ '
            s = s.split(",")
            #~ if DEBUG: print("slist: ", str(s))  #slist:  ["'ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0'", " 'ISBN'", " ' ~~~~~ '"]
            for r in s:
                r = r.strip()
                #~ if DEBUG: print("1: ", r)
                r = r.replace("~~~~~", ",")
                #~ if DEBUG: print("2: ", r)
                r = r.replace("'","").strip()
                #~ if DEBUG: print("3: ", r)
                r = r.replace('"','').strip()
                #~ if DEBUG: print("4: ", r)
                args.append(r)
                #~ if DEBUG: print("get_args:   loop arg = ", r)   #
            #END FOR
        else:
            if DEBUG: log("function either not supported (e.g. re_group), or its format in Tweaks is invalid:  " + orig_func)

        if name == 'substr':
            if nargs == 3:
                if len(args) == 3:
                    try:
                        n1 = args[1]
                        n2 = args[2]
                        if n1.isdigit() and n2.isdigit():
                            n1 = int(n1)
                            n2 = int(n2)
                            args[1]  = n1
                            args[2]  = n2
                        else:
                            args[1]  = 0
                            args[2]  = 0
                    except Exception as e:
                        if DEBUG: print("Exception in  get_args:  ",name, str(args), str(e))
                        args[1] = 0
                        args[2] = 0

        if DEBUG: print("get_args:  final args:  ", str(args))   #  ['ISBN 978–0–19–968477–9ISBN 978–0–19–150767–0', 'ISBN', ',']

    return args
#---------------------------------------------------------------------------------------------------------------------------------------
def argument_count_mismatch(tweak,args,log):
    msg = "Built-in argument count mismatch.  Error in (and ignoring):  " + tweak + "  with extracted args:  " + str(args)
    log(msg)
    if DEBUG: print(msg)
#---------------------------------------------------------------------------------------------------------------------------------------
def get_non_func_args(nargs,tweak_value):
    args = []
    s = tweak_value
    s = s.replace("\,","~~~~~")     #in case an escaped comma is in a value itself
    #~ if DEBUG: print("s: ", s)            #
    s = s.split(",")
    #~ if DEBUG: print("slist: ", str(s))  #
    for r in s:
        r = r.strip()
        #~ if DEBUG: print("1: ", r)
        r = r.replace("~~~~~", ",")
        #~ if DEBUG: print("2: ", r)
        r = r.replace("'","").strip()
        #~ if DEBUG: print("3: ", r)
        r = r.replace('"','').strip()
        #~ if DEBUG: print("4: ", r)
        args.append(r)
    #END FOR
    return args
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
def get_all_book_paths(guidb,my_book_ids_list,log):

    book_path_dict = {}

    path = guidb.library_path
    path = path.replace(os.sep, '/')
    path = os.path.join(path, 'metadata.db')
    path = path.replace(os.sep, '/')

    try:
        my_db =apsw.Connection(path, flags=apsw.SQLITE_OPEN_READONLY)
    except Exception as e:
        log(str(e))
        return book_path_dict

    my_cursor = my_db.cursor()

    mysql = 'SELECT path FROM books WHERE id = ?'
    for current_book in my_book_ids_list:
        my_cursor.execute(mysql,([current_book]))
        tmp = my_cursor.fetchall()
        if not tmp:
            pass
        else:
            if len(tmp) == 0:
                pass
            else:
                for row in tmp:
                    for col in row:
                        path = col
                        book_path_dict[current_book] = path,None
    #END FOR

        mysql = "SELECT format,name FROM data WHERE book = ? AND (format = 'TXT' OR format = 'EPUB') ORDER BY format DESC"
        my_cursor.execute(mysql,([current_book]))
        tmp = my_cursor.fetchall()
        if not tmp:
            pass
        else:
            if len(tmp) == 0:
                pass
            else:
                for row in tmp:
                    format,name = row
                    if current_book in book_path_dict:
                        path,temp =  book_path_dict[current_book]
                        book_path_dict[current_book] = path,name,format
                    break
                del tmp

    my_db.close()
    del my_db
    del my_cursor

    return book_path_dict
#---------------------------------------------------------------------------------------------------------------------------------------
def apsw_connect_to_current_ftsdb(guidb):
   #-----------------------------
    path = guidb.library_path
    if isbytestring(path):
        path = path.decode(filesystem_encoding)
    path = path.replace(os.sep, '/')
    path = os.path.join(path, 'full-text-search.db')
    path = path.replace(os.sep, '/')
    try:
        db_exist_error = False
        if os.path.isfile(path):  #Calibre does NOT automatically create it unless it needs to use it for the very first time.
            db_exist_error = False
            msg = str("The Calibre FTS DB for this Library Exists:" + str(path))
            if DEBUG: print(msg)
        else:
            db_exist_error = True
            msg = str("The Calibre FTS DB for this Library is Missing:" + str(path))
            if DEBUG: print(msg)
            return None,None,db_exist_error
    except Exception as e:
        db_exist_error = True
        msg = str("The Calibre FTS DB for this Library is Missing:" + str(path) )
        if DEBUG: print(msg)
        return None,None,db_exist_error

    my_db = apsw.Connection(path, flags=apsw.SQLITE_OPEN_READONLY)
    my_cursor = my_db.cursor()

    #BUT: Calibre sometimes creates an empty .db, so the db file exists, but has NO tables yet...
    mysql = "SELECT type,name FROM sqlite_master WHERE type='table' AND name='books_text' "
    my_cursor.execute(mysql)
    tmp_rows = my_cursor.fetchall()
    if tmp_rows is None:
        tmp_rows = []
    if len(tmp_rows) == 0:
        db_exist_error = True
        msg = str("The Calibre FTS DB for this Library is Missing:" + str(path) )
        if DEBUG: print(msg)
        return None,None,db_exist_error

    mysql_pragma = "PRAGMA main.busy_timeout = 10000;"
    my_cursor.execute(mysql_pragma)

    return my_db,my_cursor,db_exist_error
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------
#END of epom_main_job.py