# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2014,2015,2016,2017,2018,2019,2020,2021,2022,2023 DaltonST'
__my_version__ = "3.6.122"    # Qt.core

import ast,sys
import unicodedata
from calibre.constants import DEBUG
from polyglot.builtins import as_bytes, as_unicode, only_unicode_recursive, unicode_type

mystring_input_output_dict = {}

LP = as_bytes("(")
RP = as_bytes(")")
SQ = as_bytes("'")

USQ = as_bytes("(u'")
UDQ = as_bytes('(u"')
#----------------------------------------------------------------------------------------------------------------
def convert_types_to_bytestrings(mystring):
    #~ warning: this *will* strip/replace diacritics. example: "não-ficção" will become "nao-ficcao".
    #~ warning: this *will* strip/replace diacritics. example: "não-ficção" will become "nao-ficcao".
    if DEBUG: print("current function being executed: ", sys._getframe(0).f_code.co_name)

    global mystring_input_output_dict

    if isinstance(mystring,list) or isinstance(mystring,tuple):
        pass
    else:
        if mystring in mystring_input_output_dict:
            if DEBUG: print("previous conversion saved - in:", mystring, "    -  out: ", mystring_input_output_dict[mystring])
            return mystring_input_output_dict[mystring]

    if DEBUG: print("original: ", as_unicode(mystring), "     type: ", as_unicode(type(mystring)))

    s_string1 = as_bytes("")

    origin = mystring
    origtype = "?"

    if isinstance(mystring,tuple) or isinstance(mystring,list):
        is_list_of_tuples = False
        if isinstance(mystring,list):
            origtype = "list"
        else:
            origtype = "tuple"
        if DEBUG: print("[f.1] isinstance(mystring,tuple or list): ", as_bytes(mystring), "   length: ", as_bytes(len(mystring)))
        if mystring[0] == None or as_bytes(mystring[0]) == as_bytes("None") or len(mystring) == 0:
            mystring = as_bytes("")
            mystring_input_output_dict[origin] = mystring
            return mystring
        s = as_bytes("")
        for x in mystring:
            if DEBUG: print("for x in mystring: ", as_unicode(x))
            if isinstance(x,bytes):
                pass
            elif isinstance(x,tuple):
                is_list_of_tuples = True
            elif isinstance(x,list):
                is_list_of_tuples = True
            else:
                x = as_bytes(x)
            if is_list_of_tuples:
                break
            if DEBUG:
                if not isinstance(x,bytes):
                    print("Program Error: convert_types_to_bytestrings should not logically be called by the calling function...")
            s = s.join(x)
        #END FOR
        if is_list_of_tuples:
            n = len(mystring)
            if n == 0:  # list is empty
                mystring = as_bytes("")
                return mystring
            elif n == 1:  # list has 1 row, a tuple  (e.g. from a cursor.fetchall() of several values into a returned list...)
                row = mystring[0]
                i = 0
                for col in row:
                    i = i + 1
                #END FOR
                if i == 0:
                    val1 = as_bytes("")
                elif i == 1:  # list has 1 row, which is a tuple, and it has 1 column (e.g. from a cursor.fetchall() of a single column value into a returned list...e.g. SELECT COUNT(*) FROM ...
                    val1 = row[0]
                    val1 = as_bytes(val1)
                    if DEBUG: print("*** ERROR[0] ***is_list_of_tuples: ", as_unicode(mystring))
                else:  #should not be allowed to get here in the first case...SQL construction needs to add a second returned value so can simply unpack a row: SELECT Count(*),NULL FROM ...
                    val1 = only_unicode_recursive(row[0])
                    if DEBUG: print("*** ERROR[1] ***is_list_of_tuples: ", as_unicode(mystring))
                return val1
            else: #should not be allowed to get here in the first place...why is a list of multiple values being sent to this function? should be using "only_unicode_recursive" instead.
                mystring = only_unicode_recursive(mystring)
                if DEBUG: print("*** ERROR[2] ***is_list_of_tuples: ", as_unicode(mystring))
                return mystring
        mystring = as_bytes(x)
        if DEBUG: print("[f.2]  tuple mystring is now bytes: ", mystring)
        s = ""
        mystring = (s.join(x.decode('utf-8', 'replace') for x in mystring))
        mystring = unicodedata.normalize('NFKD', mystring).encode('ascii', 'ignore')

    elif isinstance(mystring,unicode_type):
        #~ warning: this will strip diacritics so "não-ficção" will become "nao-ficcao".
        origtype = "unicode"
        mystring = unicodedata.normalize('NFKD', mystring).encode('ascii', 'ignore')

    elif isinstance(mystring,bytes):
        origtype = "bytes"
        if mystring.startswith(USQ) or mystring.startswith(UDQ):    # (u'Alexander Aaronsohn',)
            mystring = as_unicode(mystring)
            mystring = ast.literal_eval(mystring)
            if isinstance(mystring,tuple):
                if mystring[0] == None or as_bytes(mystring[0]) == as_bytes("None"):
                    mystring = as_bytes("")
                    mystring_input_output_dict[origin] = mystring
                    return mystring
                s = as_bytes("")
                mystring = (s.join(as_bytes(x) for x in mystring))
                mystring = (s.join(x.decode('utf-8', 'replace') for x in mystring))
                s_string1 = as_unicode(mystring)
            else:
                s_string1 = as_unicode(mystring)
        else:
            s_string1 = as_unicode(mystring)
        #END IF
        mystring = unicodedata.normalize('NFKD', s_string1).encode('ascii', 'ignore')  #requires unicode input
    elif isinstance(mystring,int):   #Derive Genres...
        origtype = "int"
        mystring = as_bytes(mystring)
    elif isinstance(mystring,float):
        origtype = "float"
        mystring = as_bytes(mystring)
    elif isinstance(mystring,bool):
        origtype = "bool"
        mystring = as_bytes(mystring)
    #ENDIF

    #~ if DEBUG: print("original type actually processed was: ", as_unicode(origtype))

    if not isinstance(mystring,bytes):
        mystring = as_bytes(mystring)
        mystring = as_unicode(mystring)
        mystring = unicodedata.normalize('NFKD', mystring).encode('ascii', 'ignore')
        if DEBUG: print("[i] ", mystring)

    mystring = mystring.strip()

    if mystring.count(LP) != mystring.count(RP):
        if mystring.startswith(LP):
            mystring = mystring[1: ]
        if mystring.endswith(RP):
            mystring = mystring[0:-1]

    if mystring.startswith(SQ):
        mystring = mystring[1: ]
    if mystring.endswith(SQ):
        mystring = mystring[0:-1]

    mystring = mystring.strip()

    if not isinstance(mystring,bytes):
        mystring = as_bytes(mystring)       #   *always* return a scrubbed bytestring

    #~ if DEBUG: print("final mystring is: ", mystring, "  ------------------------------\n\n")

    if not origtype == "list":  #lists are not hashable, so cannot be a key to a dict...
        mystring_input_output_dict[origin] = mystring
        mystring_input_output_dict[mystring] = mystring

    return mystring
#----------------------------------------------------------------------------------------------------------------
def convert_types_to_unicode(mystring):
    if not isinstance(mystring,unicode_type):
        mystring = convert_types_to_bytestrings(mystring)
        mystring = as_unicode(mystring)
    return mystring
#----------------------------------------------------------------------------------------------------------------
def qs_standardize_any_string(mystring):
    mystring = convert_types_to_unicode(mystring)
    mystring = mystring.strip()
    return mystring
#----------------------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------------------
def qs_convert_list_of_nominal_book_ids_to_integers(in_list):
    #~ if DEBUG: print("current function being executed: ", sys._getframe(0).f_code.co_name)
    book_ids_list = []
    if isinstance(in_list,list):
        if len(in_list) == 0:
            return book_ids_list
        first = in_list[0]
        if not isinstance(first,dict):  # not: int = row['calibre_id']
            book_ids_list = [int(book) for book in in_list]
        else:
            if 'calibre_id' in first:
                for row in in_list:
                    n = row['calibre_id']
                    if not isinstance(n,int):
                        n = int(n)
                    book_ids_list.append(n)
                #END FOR
            else:
                if DEBUG: print("[2] qs_convert_list_of_nominal_book_ids_to_integers:  bad dict encountered; empty list returned for: ", as_unicode(in_list))
    else:
        if DEBUG: print("[1] qs_convert_list_of_nominal_book_ids_to_integers:  bad type encountered; empty list returned for: ", as_unicode(in_list))
    book_ids_list.sort()
    return book_ids_list
 #----------------------------------------------------------------------------------------------------------------
 #----------------------------------------------------------------------------------------------------------------
def qs_standardize_string_numerics(s,return_integer=False,return_float=False):
    if s is None:
        s = "0"
    else:
        if isinstance(s,int) or isinstance(s,float):
            s = as_unicode(s)
        else:
            s = qs_standardize_any_string(s)
            s = s.replace(',',"")
            s = s.replace("(","")
            s = s.replace(")","")
            s = s.replace(" ","")
            s = s.strip()
            if not s > "":
                s = "0"
    if return_integer or return_float:
        if "." in s:
            s = float(s)
            if not return_float:
                s = int(s)
        else:
            s = int(s)
    return s
 #----------------------------------------------------------------------------------------------------------------
#end of convert_types_to_other_types