# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__my_version__ = "1.0.205"  # BIB Catalog to RIS Converter/Exploder to Auto-Add

import os
import re
from time import sleep
from qt.core import QTimer
from calibre.constants import DEBUG
from polyglot.builtins import as_unicode, iteritems

#~ article: Refers to an article from a journal or magazine.
#~ book: Refers to a book with explicit publisher.
#~ inbook: Refers to a untitled part (chapter, section, certain pages) of a book.
#~ incollection: Refers to a titled part (chapter, section, certain pages) of a book.
#~ inproceedings: Refers to an article in a conference proceeding.
#~ electronic: Refers digital contents of the website, internet resources, etc.
#~ phdthesis: Refers to a Ph.D. thesis.
#~ mastersthesis: Refers to a Master’s thesis.
#~ manual: Refers technical documentation.
#~ unpublished: Refers to a document with an author and title, which is not published formally.
#~ techreport: Refers for a report published by an institution.
#~ mics: Refers those documents when nothing else fits.

bib_item_type_mapping_dict = {\
'article' : 'JOUR',
'book' : 'BOOK',
'booklet' : 'BOOKLET',
'electronic' : 'ELECTRONIC',
'inbook' : 'INBOOK',
'incollection' : 'INCOLLECTION',
'inpress' : 'INPRESS',
'inproceedings' : 'INPROCEEDINGS',
'manual' : 'MANUAL',
'mastersthesis' : 'MASTERSTHESIS',
'misc' : 'MISC',
'phdthesis' : 'PHDTHESIS',
'techreport' : 'TECHREPORT',
'unpublished' : 'UNPUBLISHED'\
}  #the above keys cannot exist as a key in bib_keys_mapping_dict...

bib_keys_mapping_dict = {\
'abstract' : 'AB',
'address' : 'PP',
'author' : 'AU',
'authors' : 'AU',
'bibsource' : 'DB',
'biburl' : 'UR',
'booktitle' : 'T1',
'caption' : 'CA',
'citekey' : 'ID',
'crossref' : 'M1',
'database' : 'DB',
'defaultntwo' : 'N2',
'doi' : 'DO',
'edition' : 'M1',
'editor' : 'A2',
'end_page' : 'EP',
'entrytype' : 'TY',
'file' : 'L1',
'howpublished' : 'M1',
'id' : 'ID',
'image' : 'L4',
'images' : 'L4',
'institution' : 'M1',
'isbn' : 'SN',
'issn' : 'SN',
'journal' : 'JO',
'key' : 'M1',
'keywords' : 'KW',
'language' : 'LA',
'miscellaneous' : 'M2',
'month' : 'M2',
'note' : 'M1',
'notes' : 'N1',
'number' : 'IS',
'organization' : 'M1',
'originalbibfile' : 'L3',
'pages' : 'SP',
'pmid' : 'CP',
'publisher' : 'PB',
'school' : 'M1',
'section' : 'SE',
'series' : 'M1',
'shorttitle': 'ST',
'start_page' : 'SP',
'timestamp' : 'DA',
'title' : 'T1',
'type' : 'M1',
'url' : 'UR',
'urldate' : 'DA',
'volume' : 'VL',
'year' : 'PY'\
}

bib_keys_list = []
for k,v in bib_keys_mapping_dict.items():
    bib_keys_list.append(k)
#END FOR

month_short_to_full_map = {\
'jan': 'January',
'feb': 'February',
'mar': 'March',
'apr': 'April',
'may': 'May',
'jun': 'June',
'jul': 'July',
'aug': 'August',
'sep': 'September',
'oct': 'October',
'nov': 'November',
'dec': 'December'\
}
month_short_to_digits_map = {\
'jan': '01',
'feb': '02',
'mar': '03',
'apr': '04',
'may': '05',
'jun': '06',
'jul': '07',
'aug': '08',
'sep': '09',
'oct': '10',
'nov': '11',
'dec': '12'\
}
month_full_to_digits_map = {}  # January = 01  &  january = 01
for short,num in month_short_to_digits_map.items():
    if short in month_short_to_full_map:
        full = month_short_to_full_map[short]
        month_full_to_digits_map[full] = num
#END FOR

ext_set = set()
ext_set.add('.bmp')
ext_set.add('.eps')
ext_set.add('.gif')
ext_set.add('.jpeg')
ext_set.add('.jpg')
ext_set.add('.png')
ext_set.add('.psd')
ext_set.add('.raw')
ext_set.add('.svg')
ext_set.add('.tiff')
IMAGE_FILE_EXTENSIONS = ext_set
del ext_set
#------------------------------------------------------------------------------------------------------
def convert_bib_to_ris_control(qappcurrent, maingui, bib_set_list_dict, n_bib_sets, converted_bib_to_ris_auto_add_path):

    if DEBUG: print("\n\nconvert_bib_to_ris_control\n\n")

    if len(bib_set_list_dict) == 0:
        if DEBUG: print("Error: len(bib_set_list_dict) == 0:")
        return False

    final_is_valid = True
    msg = None

    n_processed = 0
    n_uniqueness_counter = 0

    output_files_to_write_list = []  #so can use qtimer.singleshot to stagger them so Calibre does not HourGlass/Hang forever if there are a lot of files in the Auto-Add directory to add.

    for unique_name,bib_set_list in iteritems(bib_set_list_dict):    # each k == a "group_list" created in ui.py...

        #unique_name is unique only for the entire bib_set_list, not each output .ris file from each bib_set in bib_set_list

        if DEBUG: print("type of bib_set_list: ", type(bib_set_list))  # <class 'list'>

        for bib_set in bib_set_list:

            if DEBUG: print("type of bib_set: ", type(bib_set))   # <class 'list'>

            if len(bib_set) == 0:
                if DEBUG: print("empty bib_set")
                continue

            n_processed = n_processed + 1

            bib_items_dict = parse_bib_set_list(bib_set)

            ris_string_list = convert_bib_items_dict_to_ris(bib_items_dict)

            if len(ris_string_list) == 0:
                if DEBUG: print("ERROR: ris_string_list is empty...")
                continue

            n_uniqueness_counter = n_uniqueness_counter + 1

            unique_namex = str(unique_name) + str(n_uniqueness_counter)

            file_name = unique_namex + ".ris"

            if DEBUG: print("Unique in Auto-Add directory, RIS .ris file name will be: ", file_name)
            output_ris_full_path = os.path.join(converted_bib_to_ris_auto_add_path,file_name)
            output_ris_full_path = output_ris_full_path.replace(os.sep,"/")
            if DEBUG: print(">>>>>auto-add:  output_ris_full_path: ", output_ris_full_path)
            r = output_ris_full_path,ris_string_list
            output_files_to_write_list.append(r)
    #END FOR

    n_total = len(output_files_to_write_list)
    n_total_done = 0
    n = 0
    for r in output_files_to_write_list:
        output_ris_full_path,ris_string_list = r
        n = n + 1
        n_total_done = n_total_done + 1
        is_valid,msg = write_new_ris_file_to_calibre_autoadd_directory(output_ris_full_path,ris_string_list)
        if not is_valid:
            final_is_valid = False
            if DEBUG: print("NOT is_valid; breaking & returning: ", output_ris_full_path, str(ris_string_list))
            break
        if n >= 2:
            n_percent_finished = round(100 * (n_total_done / n_total))
            n = 0
            qappcurrent.processEvents()
            sleep(4)  # assumes Calibre can easily add (without fully hanging the GUI) ~1 new book via auto-add per 2 seconds
            msg = "JS: New RIS 'Books' are being Auto-Added (two at a time): Progress: " + str(n_percent_finished) + "%"
            maingui.status_bar.show_message(msg)
            if DEBUG: print(msg)
            #~ Add progress bar with option to cancel?
            qappcurrent.processEvents()
    #END FOR

    if n_bib_sets != n_processed:
        if DEBUG: print("n_bib_sets != n_processed: ", str(n_bib_sets)," != ", str(n_processed))
        final_is_valid = False
    else:
        if DEBUG: print("n_bib_sets == n_processed: ", str(n_bib_sets)," == ", str(n_processed))

    return final_is_valid,msg
#------------------------------------------------------------------------------------------------------
def parse_bib_set_list(bib_set):

    AT_SIGN = "@"
    LEFT_BRACE = "{"
    RIGHT_BRACE = "}"
    COMMA = ","
    RIGHT_BRACE_COMMA = "},"

    tmp_list = []
    tmp_list.append(AT_SIGN)
    tmp_list.append(LEFT_BRACE)
    tmp_list.append(RIGHT_BRACE)
    tmp_list.append(RIGHT_BRACE_COMMA)  # delimits the end of a logical line of a bib_value that is multi-line in length...
    tmp_list.append(COMMA)

    SYMBOLS_TO_REMOVE_LIST = tmp_list

    bib_items_dict = {}
    at_line = ""

    title_was_found = False
    keywords_was_found = False

    for line in bib_set:
        if DEBUG: print("line in bib_set_list: ", line)
        if line.startswith(AT_SIGN):   # assumes the first 2 lines in the raw bib_string split by "}," were not concatenated erroneously...input file data was fixed in ui.py...
            if "= {" in line or "={" in line:
                if DEBUG: print("Error: first 2 lines in the raw bib_string split by '},' were still concatenated...", line)
            at_line = line
            s_list = at_line.split("{")
            bib_value = s_list[0].strip()
            bib_value = bib_value.replace(AT_SIGN,"")
            bib_items_dict['entrytype'] = bib_value   # BIB:  'entrytype' : 'TY'  --->>>  'TY': 'type_of_reference'    :RIS
            citekey = s_list[1].strip()
            for s in SYMBOLS_TO_REMOVE_LIST:
                citekey = citekey.replace(s,"")
            bib_items_dict['citekey'] = citekey            # BIB:  'citekey' : 'ID'      --->>>  'ID':	'reference_id'          :RIS
            del s_list
        elif line.startswith(RIGHT_BRACE):
            break  #end of a group
        elif len(line) == 0:
            continue
        else:
            if line.count("={") > 0:  # ...input file data was supposedly fixed in ui.py...but...don't split a continuation line for multi-line bib_values, such as Abstracts, Notes, and other long texts...
                s_list = line.split("=")
                #~ if DEBUG: print("len(s_list) = :", str(len(s_list)))
                n = len(s_list)
                if n == 2:
                    bib_key = s_list[0].strip()
                    bib_value = s_list[1].strip()
                    del s_list
                else:
                    if DEBUG: print("Error:  Data Error: s_list = line.split('=') should have 2 rows: ", line)
                    continue
            else:
                if DEBUG: print("Error:  Continuation Line??: ", line)  # error: continuation line for multi-line bib_values, such as Abstracts, Notes, and other long texts...
                continue

            for s in SYMBOLS_TO_REMOVE_LIST:
                bib_value = bib_value.replace(s,"")
            #END FOR

            if bib_key == "issn":
                issn_regex = "^[0-9a-z]{4}[-][0-9a-z]{4}[ ][0-9a-z]{4}[-][0-9a-z]{4}"   #e.g. '1740-8849 1740-8857'
                p = re.compile(issn_regex, re.IGNORECASE)
                match = p.search(bib_value)
                if match:
                    bib_items_dict['defaultntwo'] = "Multiple ISSNs: " + bib_value  #original multiple issns e.g. '1740-8849 1740-8857' is defaulted to Tag N2 since N2 is otherwise never used
                    s_split = bib_value.split(" ")
                    s_split.sort()
                    bib_value = s_split[0].strip()  #single issn after sort ascending e.g. '1740-8849'
                    del s_split
            elif bib_key == "isbn":
                bib_value = bib_value.replace("-","")
                bib_value = bib_value.replace(" ","")
            elif bib_key == "title":
                bib_value = bib_value.replace("  "," ")
                title_was_found = True
            elif bib_key == "keywords":
                keywords_was_found = True

            bib_items_dict[bib_key] = bib_value
    #END FOR (line in bib_set)
    if not title_was_found:
        if keywords_was_found:
            #default title to keywords or citekey; this happens more than you would think.  BIB in the wild is unreliable (a gross understatement).
            if 'keywords' in bib_items_dict:
                bib_value = bib_items_dict['keywords']
                bib_items_dict['title'] = bib_value
            elif 'citekey' in bib_items_dict['citekey']:
                bib_value = bib_items_dict['citekey']
                bib_items_dict['title'] = bib_value

    if DEBUG:
        print("\n\nListing of contents of bib_items_dict:  k,v:  ")
        for k,v in iteritems(bib_items_dict):
            print(k,v)
        #END FOR
        print("\n\n")

    del bib_set
    del tmp_list

    return bib_items_dict
#------------------------------------------------------------------------------------------------------
def convert_bib_items_dict_to_ris(bib_items_dict):

    global bib_item_type_mapping_dict
    global bib_keys_mapping_dict

    TAGS_TO_TITLECASE = ['TI','T1','T2','T3','TT','ST','J2','JF','CT','CY']

    ris_tag_value_list = []

    special_pubdate_dict = {}

    for bib_key, bib_value in iteritems(bib_items_dict):
        if DEBUG: print(bib_key, bib_value)
        if bib_key == 'entrytype':                                                  # BIB:  'entrytype' : 'TY'  --->>>  'TY': 'type_of_reference'    :RIS
            if bib_value in bib_item_type_mapping_dict:                #  'article' : 'JOUR',
                ris_type = bib_item_type_mapping_dict[bib_value]  #  'JOUR'  <--  'article'
                ris_tag = bib_keys_mapping_dict['entrytype']           #  'entrytype' : 'TY'
                bib_value = ris_type                                                  #  RIS row = 'TY','JOUR'
            else:  #non-standard entry type per official list...
                ris_tag = bib_keys_mapping_dict['entrytype']           #  'entrytype' : 'TY'
                #~ bib_value has not changed...
        elif bib_key in bib_keys_mapping_dict:
            ris_tag = bib_keys_mapping_dict[bib_key]
        else:
            ris_tag = "M1"  # miscellaneous -- default ris_tag to use to default it to Comments
        #ENDIF
        r = ris_tag,bib_value
        ris_tag_value_list.append(r)
        if bib_key == "month" or bib_key == "year":
            special_pubdate_dict[bib_key] = bib_value
    #END FOR

    if len(special_pubdate_dict) > 0:
        month = None
        year = None
        new_py_pubdate_tag = None
        for k,v in special_pubdate_dict.items():
            if k == "month":
                month = v
            elif k == "year":
                year = v
            else:
                continue
        #ENDIF
        if year is not None:
            if month is None:
                num = "01"
            else:
                num = convert_month_to_digits(month)
                if num is  None:
                    num = '01'
            date = year + '-' + num + '-' + '01'
            new_py_pubdate_tag = date

    ris_tag_value_list.sort()

    ris_string_list = []

    for r in ris_tag_value_list:
        ris_tag,bib_value = r
        if ris_tag == 'TY':  # must be the very first row...
            s = f'{ris_tag}  - {bib_value}'    # format is:  XX__-_xxxx; otherwise, considered corrupt .ris file.
            ris_string_list.append(s)
            break
    #END FOR

    for r in ris_tag_value_list:
        ris_tag,bib_value = r

        if ris_tag == 'TY':
            continue

        if ris_tag == 'AU':  #authors, who if have ' and ' must change to the Calibre standard ' & ' separator so each author gets its own Calibre author-id.
            bib_value = bib_value.replace(' and ',' & ')

        if ris_tag == 'PY':
            if new_py_pubdate_tag is not None:
                bib_value = new_py_pubdate_tag

        if ris_tag == 'L1':
            if ".pdf" in bib_value:
                pass                           # 'L1': 'file_attachments1/pdf',
            elif ".epub" in bib_value:
                ris_tag = 'L2'              # 'L2': 'file_attachments2',
            elif ".txt" in bib_value:
                ris_tag = 'L2'              # 'L2': 'file_attachments2',
            elif ".html" in bib_value:
                ris_tag = 'L2'              # 'L2': 'file_attachments2',
            else:
                ris_tag = 'L?'              # to be dermined next...
        #END IF

        if ris_tag == 'L?':
            for ext in IMAGE_FILE_EXTENSIONS:
                if ext in bib_value:
                    ris_tag = 'L4'      # 'L4': 'image(s)',
                    break
            #END FOR

        if ris_tag == 'L?':
            ris_tag = 'L2'             # 'L2': 'file_attachments2',
        #END IF

        if ris_tag == 'L3':        # Note:  'L3':	'related_records', is RESERVED for the original .bib file name being converted to .ris
            if not '.bib' in bib_value:
                ris_tag = 'L2'
        #END IF

        if ris_tag.startswith('L'):
            bib_value = bib_value.strip()

        if ris_tag in TAGS_TO_TITLECASE:
            bib_value = bib_value.title()
            if DEBUG: print("ris_tag titlecased: ", ris_tag, bib_value)

        s = f'{ris_tag}  - {bib_value}'
        ris_string_list.append(s)
    #END FOR

    s = 'ER  -'   # very last, and *must* have 2 spaces after ER and before the - ...
    ris_string_list.append(s)

    if DEBUG: print("".join(ris_string_list))

    return ris_string_list
#------------------------------------------------------------------------------------------------------
def  convert_month_to_digits(month):
    month = month.replace(".","")
    if month in month_short_to_digits_map:
        num = month_short_to_digits_map[month]
    elif month in month_full_to_digits_map:
        num = month_full_to_digits_map[month]
    else:
        num = None
    return num
#------------------------------------------------------------------------------------------------------
def write_new_ris_file_to_calibre_autoadd_directory(output_ris_full_path,ris_string_list):
    is_valid = True
    msg = "Not available"

    f = open(output_ris_full_path, 'wt', encoding='utf-8')
    try:
        QTimer.singleShot(0, lambda: ris_write_line_qtimer(f,ris_string_list))
        is_valid = True
        if DEBUG: print("f.write(ris_string_list)....is_valid = True")
    except Exception as e:
        is_valid = False
        msg = "ERROR:   Qtimer.singleshot(0, lambda: ris_write_line_qtimer(f,ris_string_list)) " + str(e)

    return is_valid,msg
#------------------------------------------------------------------------------------------------------
def ris_write_line_qtimer(f,ris_string_list):
    try:
        for line in ris_string_list:
            if line > "":
                line = line + "\n"
                f.write(line)
        #END FOR
        if DEBUG: print("f.write(ris_string_list)....was successful...")
    except Exception as e:
        if DEBUG: print("write_new_ris_file_qtimer: Error: ", str(e))

    f.close()
    del f
    return
#------------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------