# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__   = 'GPL v3'
__copyright__ = '2015,2016,2017,2018,2019,2020  DaltonST <DaltonShiTzu@outlook.com>'
__my_version__ = "1.0.82"   #Python 3 regression errors fixed

import os, sys, apsw, csv, datetime, re

from calibre import isbytestring
from calibre.constants import filesystem_encoding
from calibre.gui2.threaded_jobs import ThreadedJob
from calibre.utils.logging import Log

from polyglot.builtins import as_unicode, unicode_type
from polyglot.queue import Queue

#---------------------------------------------------------------------------------------------------
from calibre_plugins.multi_column_search.heading import log_heading_common
from calibre_plugins.multi_column_search.config import prefs
#---------------------------------------------------------------------------------------------------


mynothing = ""
custom_column_id = as_unicode(0)
my_terminate_early = False

notifications = Queue()
log = Log()

header_s1 = None
header_s2 = None
header_s3 = None
header_s4 = None
header_s5 = None

lib_path = ""
update_mcs_was_indexed_custom_column = False
word_delimiter_characters_list = ""

#---------------------------------------------------------------------------------------------------------------------------------------------
def mcs_update_word_book_index(self,my_guidb,sel_type,selected_books_list,mwicc,wdcl,log=None, abort=None, notifications=True):

    #----------------------------------------------------------------------------------------------------------------
    global header_s1
    global header_s2
    global header_s3
    global header_s4
    global header_s5
    #----------------------------------------------------------------------------------------------------------------

    notifications.put((0.01, 'MCS Indexing Words by Book'))
    log(' ')

    global lib_path

    path = my_guidb.library_path
    if isbytestring(path):
        path = path.decode(filesystem_encoding)
    path = path.replace(os.sep, '/')
    lib_path = path
    path = os.path.join(path, 'metadata.db')
    path = path.replace(os.sep, '/')

    log(path)

    try:
        my_db = apsw.Connection(path)
    except Exception as e:
        log(as_unicode(e))
        return

    my_cursor = my_db.cursor()

    mysql = "PRAGMA main.busy_timeout = 15000;"      #PRAGMA busy_timeout = milliseconds;
    my_cursor.execute(mysql)

    #----------------------------------------------------------------------------------------------------------------
    header_s1 =  as_unicode("SQLite Version: " + as_unicode(apsw.SQLITE_VERSION_NUMBER) + "    [APSW]")
    header_s2 = mysql
    header_s3 = "Beginning 'Index Words by Book' "
    log_heading_common(log,header_s1,header_s2,header_s3,header_s4,header_s5)
    #----------------------------------------------------------------------------------------------------------------
    #----------------------------------------------------------------------------------------------------------------
    #----------------------------------------------------------------------------------------------------------------

    global update_mcs_was_indexed_custom_column
    update_mcs_was_indexed_custom_column = mwicc

    global word_delimiter_characters_list
    word_delimiter_characters_list = wdcl

    log(" ")

    mcs_index_word_by_book_control(my_db,my_cursor,log,notifications,sel_type,selected_books_list)

    global my_terminate_early
    if not my_terminate_early:
        notifications.put((0.99, 'Indexing Words by Book'))

    count_records_in_index(my_db,my_cursor)

    my_db.close()

    log(' ')
    log(' ')
    log("You should now defragment/vacuum this library's metadata.db by invoking Calibre menu path: Library > Library Maintenance > Check Library.")
    log(' ')
    log(' ')
    log('MCS Word by Book Indexing is Complete. ')
    log(' ')
    log(' ')
#---------------------------------------------------------------------------------------------------------------------------------------------
def mcs_index_word_by_book_control(my_db,my_cursor,log,notifications,sel_type,selected_books_list):

    msg = ""

    if sel_type == 'selected' :
        msg = "Indexing Action chosen: Index/Reindex Selected Books [TXT Only]"
        log(msg)
        log(" ")
        log("Number of books to be indexed/reindexed (potentially): " + as_unicode(len(selected_books_list)) )
    elif sel_type == 'all':
        msg = "Indexing Action chosen: Index/Reindex All Books [TXT Only]"
        log(msg)
        log(" ")
        log("Number of books to be indexed/reindexed: " + as_unicode(len(selected_books_list)) )
    elif sel_type == 'all_unindexed':
        msg = "Indexing Action chosen: Index All Unindexed Books [TXT Only]"
        log(msg)
        log(" ")
        log(" ")
        log("Number of books available to be indexed if needed and possible:    " + as_unicode(len(selected_books_list)) )
        selected_books_list = trim_selected_books_list(my_db,my_cursor,log,selected_books_list)
        log("Number of unindexed books that need to be indexed if possible: " + as_unicode(len(selected_books_list)) )
    else:
        log("Invalid Indexing Action")

    log(" ")

    if len(selected_books_list) > 0:
        index_selected_books_control(my_db,my_cursor,log,notifications,selected_books_list)

    if prefs['WORD_INDEX_TRIM_CSV'] == unicode_type("True"):
        add_csv_words_control(my_db,my_cursor,log,notifications)

    remove_deleted_books_from_index(my_db,my_cursor,log)
#---------------------------------------------------------------------------------------------------------------------------------------------
def index_selected_books_control(my_db,my_cursor,log,notifications,selected_books_list):

    global my_terminate_early
    global update_mcs_was_indexed_custom_column
    if update_mcs_was_indexed_custom_column:
        get_custom_column_id(my_db,my_cursor)
        if not update_mcs_was_indexed_custom_column:
            my_terminate_early = True
            log("MCS User Error:  the MCS special 'yes/no' (boolean) custom column 'mcs_was_indexed' does not exist, but it was requested that it be updated.")

    global total_words_added_to_index
    total_words_added_to_index = 0

    word_index_minimum_number_of_letters = int(prefs['WORD_INDEX_MINIMUM_NUMBER_OF_LETTERS'])
    word_index_maximum_number_of_letters = 50

    log("Word lengths in this min/max range will be indexed: ", as_unicode(word_index_minimum_number_of_letters) + " to " + as_unicode(word_index_maximum_number_of_letters))
    log(" ")
    log(" ")
    log(" ")


    total_books = len(selected_books_list)
    if total_books == 0:
        return
    total_indexed = 0
    i = 0
    for current_book in selected_books_list:
        i = i + 1
        full_book_path = build_book_path(my_db,my_cursor,current_book)
        if not full_book_path:
            continue
        msg = 'Indexing: ' + full_book_path
        n_progress = i/total_books
        notifications.put((n_progress, msg))
        terminate_early = mcs_update_txt_format_word_index_table(my_db,my_cursor,log,current_book,full_book_path,word_index_minimum_number_of_letters,word_index_maximum_number_of_letters)
        if my_terminate_early:
            break
        if update_mcs_was_indexed_custom_column:
            set_mcs_was_indexed_custom_column(my_db,my_cursor,current_book)
        total_indexed = total_indexed + 1
    #END FOR
    if update_mcs_was_indexed_custom_column:
        synchronize_set_mcs_was_indexed_custom_column(my_db,my_cursor,log)
    log(" ")
    log(" ")
    log(" ")
    log("Total number of books indexed: " + as_unicode(total_indexed))
    log(" ")
    log("Total number of words indexed: " + as_unicode(total_words_added_to_index))
    log(" ")
    log(" ")
#---------------------------------------------------------------------------------------------------------------------------------------------
def mcs_update_txt_format_word_index_table(my_db,my_cursor,log,current_book,current_book_full_path,word_index_minimum_number_of_letters,word_index_maximum_number_of_letters):
    # this function is used in ui.py too.

    global word_delimiter_characters_list
    global total_words_added_to_index

    try:
        current_txt_format_file = open(current_book_full_path, "r")
        current_txt_format_data = current_txt_format_file.readlines()
        current_txt_format_file.close()
        del current_txt_format_file

        if not current_txt_format_data:
            return False

        s = unicode_type(" ")
        long_string = s.join(current_txt_format_data)

        del current_txt_format_data

        long_string = long_string.replace(r'\\a',' ') # https://docs.python.org/2.0/ref/strings.html
        long_string = long_string.replace(r'\\b',' ')
        long_string = long_string.replace(r'\\f',' ')
        long_string = long_string.replace(r'\\n',' ')
        long_string = long_string.replace(r'\\r',' ')
        long_string = long_string.replace(r'\\t',' ')
        long_string = long_string.replace(r'\\v',' ')
        long_string = long_string.replace(r'\\',' ')

        re.escape("\\")

        for c in word_delimiter_characters_list:
            try:
                s = "[\\" + c + "]"
                try:
                    long_string = re.sub(s," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)        # re.sub(pattern, repl, string, count=0, flags=0)
                except Exception as e:
                    pass
                    continue
            except Exception as e:
                try:
                    long_string = long_string.replace(c," ")
                except Exception as e:
                    pass
                continue
        #END FOR

        s_numbers = '0123456789'
        for s in s_numbers:
            long_string = re.sub(s," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)
        #END FOR

        long_string = re.sub("   "," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)
        long_string = re.sub("   "," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)
        long_string = re.sub("  "," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)
        long_string = re.sub("  "," ",long_string,0,re.DOTALL|re.MULTILINE|re.IGNORECASE)

        try:
            words_list = long_string.split(" ")
            del long_string
        except Exception as e:
            print("[1]", as_unicode(e))
            return False

        if len(words_list) == 0:
            print("words_list is empty; returning")
            return False

        re_skip_regex = prefs['WORD_INDEX_SKIP_REGEX']
        re_skip_regex = re_skip_regex.strip()

        try:
            re_skip = re.compile(re_skip_regex, re.IGNORECASE)
        except Exception as e:
            print("re_skip re.compile exception: ", as_unicode(e))
            re_skip_regex = ""

        word_occurrences_list = []

        for word in words_list:
            try:
                if not isinstance(word,unicode_type):
                    word = unicode_type(word)
                word = word.replace(" ","")
                word.strip()
                if re_skip_regex > " " and re_skip_regex != "":
                    try:
                        match1 = re_skip.search(word)
                        if match1:
                            #~ word = None
                            continue
                    except Exception as e:
                        print("match1 = re_skip(word) exception: ", as_unicode(e))
                        pass
                if word:
                    word.strip()
                    n = len(word)
                    if n >= word_index_minimum_number_of_letters:
                        if n <= word_index_maximum_number_of_letters:
                            try:
                                word = word.lower()
                            except:
                                pass
                            word_occurrences_list.append(word)
            except Exception as e:
                print("[2]", as_unicode(e))
                continue
        #END FOR

        word_occurrences_set = set(word_occurrences_list)
        word_occurrences_list = list(word_occurrences_set)
        del word_occurrences_set

        my_cursor.execute("begin")
        mysql = "DELETE FROM _mcs_word_book_index WHERE book = ?"
        my_cursor.execute(mysql,([current_book]))
        my_cursor.execute("commit")

        my_cursor.execute("begin")
        mysql = "INSERT OR IGNORE INTO _mcs_word_book_index  (book,word) VALUES(?,?) "
        for word in word_occurrences_list:
            my_cursor.execute(mysql,(current_book,word))
            total_words_added_to_index = total_words_added_to_index + 1
        #END FOR
        my_cursor.execute("commit")
        log(current_book_full_path + "   [" + as_unicode(len(word_occurrences_list)) + " words indexed]")
        del word_occurrences_list
        return True
    except Exception as e:
        try:
            del word_occurrences_list
        except:
            pass
        try:
            my_cursor.execute("commit")
        except:
            pass
        print("[END] Exception in 'def mcs_update_txt_format_word_index_table': ", as_unicode(e))
        return False
#-----------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------------
def build_book_path(my_db,my_cursor,current_book):

    global lib_path

    full_book_path = None
    path_to_use = ""
    format_to_use = ""
    name = ""

    mysql = 'SELECT path FROM books WHERE id = ?'
    my_cursor.execute(mysql,([current_book]))
    tmp = my_cursor.fetchall()
    if not tmp:
        return full_book_path
    else:
        if len(tmp) == 0:
            return full_book_path
        else:
            for row in tmp:
                for col in row:
                    path_to_use = col

    mysql = "SELECT format,name FROM data WHERE book = ? AND format = 'TXT' "
    my_cursor.execute(mysql,([current_book]))
    tmp = my_cursor.fetchall()
    if not tmp:
        pass
    else:
        if len(tmp) == 0:
            pass
        else:
            for row in tmp:
                format_to_use,name = row
                break
            del tmp

    if not format_to_use == "TXT":
        return full_book_path

    s_lower = format_to_use.lower()

    name = name + "." + s_lower

    path_to_use = os.path.join(path_to_use,name)

    full_book_path = os.path.join(lib_path,path_to_use)

    full_book_path = full_book_path.replace(os.sep,"/")

    del path_to_use
    del name
    del s_lower

    return full_book_path
#-----------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------------
def set_mcs_was_indexed_custom_column(my_db,my_cursor,current_book):

    global custom_column_id
    global mysql_set_mcs_was_indexed

    my_cursor.execute("begin")
    my_cursor.execute(mysql_set_mcs_was_indexed,([current_book]))
    my_cursor.execute("commit")
#---------------------------------------------------------------------------------------------------------------------------------------------
def get_custom_column_id(my_db,my_cursor):

    global update_mcs_was_indexed_custom_column
    global custom_column_id
    global mysql_set_mcs_was_indexed

    custom_column_id = as_unicode(0)

    mysql = "SELECT id,editable FROM custom_columns WHERE label = 'mcs_was_indexed' AND datatype = 'bool' AND normalized = 0   "
    my_cursor.execute(mysql)
    tmp_rows = my_cursor.fetchall()
    if not tmp_rows:
        tmp_rows = []
        update_mcs_was_indexed_custom_column = False
        return
    for row in tmp_rows:
        custom_column_id,editable = row
    #END FOR

    if editable == 1:
        my_cursor.execute("begin")
        mysql = "UPDATE custom_columns SET editable = 0 WHERE id = ?"
        my_cursor.execute(mysql,([custom_column_id]))
        my_cursor.execute("commit")

    custom_column_id = as_unicode(custom_column_id)

    mysql_set_mcs_was_indexed = as_unicode("INSERT OR REPLACE INTO custom_column_[N] (id,book,value) VALUES(NULL,?,1) ")
    mysql_set_mcs_was_indexed = mysql_set_mcs_was_indexed.replace("[N]",as_unicode(custom_column_id))
#---------------------------------------------------------------------------------------------------------------------------------------------
def trim_selected_books_list(my_db,my_cursor,log,selected_books_list):

        unindexed_selected_book_list = []

        mysql = "SELECT count(*) FROM _mcs_word_book_index WHERE book = ?"
        for current_book in selected_books_list:
            my_cursor.execute(mysql,([current_book]))
            tmp_rows = my_cursor.fetchall()
            if not tmp_rows:
                unindexed_selected_book_list.append(current_book)
            else:
                for row in tmp_rows:
                    for col in row:
                        if col == 0:
                            unindexed_selected_book_list.append(current_book)
                        break
                    #END FOR
                    break
                #END FOR
        #END FOR

        if len(unindexed_selected_book_list) == 0:
            log("No unindexed books were found")
            return selected_books_list
        else:
            del selected_books_list
            return unindexed_selected_book_list
#---------------------------------------------------------------------------------------------------------------------------------------------
def remove_deleted_books_from_index(my_db,my_cursor,log):
    try:
        my_cursor.execute("begin")
        mysql = "DELETE FROM _mcs_word_book_index WHERE book NOT IN(SELECT id FROM books WHERE id = _mcs_word_book_index.book)"
        my_cursor.execute(mysql)
        my_cursor.execute("commit")
        log("Deleted Books or Invalid Books added via a CSV file, if any, have been removed from the Index.")
    except:
        try:
            my_cursor.execute("commit")
        except:
            pass
#---------------------------------------------------------------------------------------------------------------------------------------------
def synchronize_set_mcs_was_indexed_custom_column(my_db,my_cursor,log):
    global custom_column_id
    try:
        table = "custom_column_[N]"
        table = table.replace("[N]",as_unicode(custom_column_id))
        my_cursor.execute("begin")
        mysql = "UPDATE or IGNORE " + table + " SET value = 0 WHERE book NOT IN(SELECT DISTINCT book FROM _mcs_word_book_index WHERE book = " + table + ".book )"
        my_cursor.execute(mysql)
        my_cursor.execute("commit")
        log(" ")
        log(" ")
        log("Custom Column #mcs_was_indexed has been synchronized with the word-book index for non-MCS (i.e., user) index deletions or insertions, if any.")
    except Exception as e:
        print(mysql)
        print("Error in synchronize_set_mcs_was_indexed_custom_column: ", as_unicode(e))
        pass
#---------------------------------------------------------------------------------------------------------------------------------------------
def count_records_in_index(my_db,my_cursor):
    n_records = 0
    mysql = "SELECT count(*) FROM _mcs_word_book_index "
    my_cursor.execute(mysql)
    tmp_rows = my_cursor.fetchall()
    if not tmp_rows:
        pass
    else:
        for row in tmp_rows:
            for col in row:
                n_records = col
            #END FOR
        #END FOR
    prefs['WORD_INDEX_LATEST_RECORD_COUNT'] = unicode_type(n_records)
    prefs
#---------------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------------
def add_csv_words_control(my_db,my_cursor,log,notifications):

    log("--------------------------------------------------------------")

    csv_word_list = upload_csv_file(my_db,my_cursor,log,notifications)
    if not len(csv_word_list) > 0:
        log("CSV File was empty.  Nothing done.")
        return
    add_csv_words(my_db,my_cursor,log,notifications,csv_word_list)

    log("--------------------------------------------------------------")
#---------------------------------------------------------------------------------------------------------------------------------------------
def upload_csv_file(my_db,my_cursor,log,notifications):

    tmp_list = []
    csv_word_list  = []

    csv_path = prefs['WORD_INDEX_ADD_CSV_CHOSEN_FILE_PATH']

    log(" ")
    log("CSV File path: " + csv_path)

    if csv_path == unicode_type(""):
        return csv_word_list

    try:
        with open (csv_path,'rb') as csvfile:
            mcs_csv_reader = csv.reader(csvfile,dialect='excel')
            for row in mcs_csv_reader:
                tmp_list.append(row)
            #END FOR
        csvfile.close()
        del csv_path
        del mcs_csv_reader
        for row in tmp_list:
            if isinstance(row,list):
                csv_word_list.append(row)
            elif isinstance(row,unicode_type):
                pass
    except Exception as e:
        log("CSV File Error: " + as_unicode(e))

    return csv_word_list
#---------------------------------------------------------------------------------------------------------------------------------------------
def add_csv_words(my_db,my_cursor,log,notifications,csv_word_list):

    # csv_word_list is a list of lists.  each appended list is a row from the csv.  each row has 2 columns:  book; word.  the columns must be comma-separated, not tab or space separated.

    # pre-existing words in the index are ignored so that this function can be used to 'enhance' a book's index as the user wishes.  For example, user-defined 'tags' could be added to the index.

    n_total_csv_words_to_add = len(csv_word_list)

    log(" ")
    log("Number of book/word combinations in the specified CSV file to be added to the index: " + as_unicode(n_total_csv_words_to_add))
    log(" ")

    notifications.put((0.01, 'MCS Adding CSV Words to the Index'))

    my_cursor.execute("begin")
    i = 0
    n_total = 0
    for row in csv_word_list:        # row = ['1,"abalanzaba"']
        try:
            #~ log("raw csv row: " + as_unicode(row))
            book,word = row
            #~ log("raw csv book/word to be added: " + as_unicode(book) + word)
        except Exception as e:
            log("book,word = row in csv file error.  invalid csv file format and/or data: " + as_unicode(e))
            break
        try:
            if book:
                book = as_unicode(book)
                book = book.replace('"',"")
                book = book.replace("'","")
                book = book.strip()
                try:
                    book = int(book)
                except:
                    continue
                if word:
                    try:
                        if isinstance(word,str):
                            word.decode('utf-8', errors='replace')
                        elif isinstance(word,unicode_type):
                            word.encode('utf-8', errors='replace')
                    except Exception as e:
                        print("utf-8 decode error: ", as_unicode(e))
                        pass
                    word = word.replace('"',"")
                    word = word.replace("'","")
                    word = word.strip()
                    if word != '' and word != ' ':
                        mysql = "INSERT OR IGNORE INTO _mcs_word_book_index (book,word) VALUES (?,?)"
                        my_cursor.execute(mysql,(book,word))
                        #~ log("added: " + as_unicode(book) + word)
                        n_total = n_total + 1
                        i = i + 1
                        if i >= 100:
                            my_cursor.execute("commit")
                            n_progress = float(n_total/n_total_english_words_to_delete)
                            notifications.put((n_progress, 'MCS Adding CSV Words'))
                            my_cursor.execute("begin")
                            i = 0
                        else:
                            continue
        except Exception as e:
            log("CSV data error for book/word: " + as_unicode(book) + " --- " + word + " with the reason: " + as_unicode(e) )
            break
    #END FOR
    try:
        my_cursor.execute("commit")
    except:
        pass

    log("Number of CSV words processed: " + as_unicode(n_total))
    log(" ")
    log("CSV words, if any, have been added to the index")

    notifications.put((0.99, 'CSV words, if any, have been added to the index'))

    del csv_word_list
#---------------------------------------------------------------------------------------------------------------------------------------------
#END of main.py
