# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__   = 'GPL v3'
__copyright__ = '2020 DaltonST <DaltonShiTzu@outlook.com>'
__my_version__ = "1.0.3"  #Miscellany

"""
RIS is a standardized tag format developed by Research Information Systems, Incorporated to enable citation programs to exchange data.
Filename extension:ris
Internet media type:application/x-research-info-systems
Type of format:Bibliography
"""

from calibre.constants import DEBUG
from calibre.customize import FileTypePlugin

from polyglot.builtins import as_unicode, iteritems

DOI_ORG_URL = "https://www.doi.org/"

#--------------------------------------------------------------------------------------------
class Extract_RIS_Citations(FileTypePlugin):

    name                    = 'Extract RIS Citations'
    description           = "Extract a Citation from RIS tags then update the new book's bibliographic metadata."
    supported_platforms     = ['windows', 'osx', 'linux']
    author                  = 'DaltonST'
    version                 = (1, 0, 3)
    minimum_calibre_version = (5, 7, 0)

    file_types     = set(['ris'])

    on_postimport  = True  # this plugin is run after books are added to the database; implies that both the postimport and postadd methods of the plugin are to be called.

    #--------------------------------------------------------------------------------------------
    def initialize(self):
        return
    #--------------------------------------------------------------------------------------------
    def is_customizable(self):
        return True
    #--------------------------------------------------------------------------------------------
    def config_widget(self):
        from calibre_plugins.extract_ris_citations.config import ConfigWidget as config_widget
        return config_widget()
    #--------------------------------------------------------------------------------------------
    def save_settings(self, config_widget):
        return True
    #--------------------------------------------------------------------------------------------
    def run(self, path_to_ebook):
        return path_to_ebook
    #--------------------------------------------------------------------------------------------
    def postimport(self, book_id, book_format, db):
        return
    #--------------------------------------------------------------------------------------------
    def postadd(self, book_id, fmt_map, db):
        if isinstance(fmt_map,dict):
            if 'ris' in fmt_map:
                path = db.format_path(book_id, 'RIS', index_is_id=True)
                if path is None:
                    return
                if not path.endswith(".ris"):
                    return
                if DEBUG:  print("ERC Plugin: path of original RIS file: ", fmt_map['ris'])
            else:
                if DEBUG: print("ERC Plugin: Not an RIS file...nothing to do.")
                return
        else:
            return

        fd = self.load_text_file_data(path)
        if not len(fd) > 2:
            return

        self.create_metadata_from_ris_file_control(db,book_id,fd)

        del fd
    #--------------------------------------------------------------------------------------------
    def create_metadata_from_ris_file_control(self,db,book_id,fd):

        ris_tag_desc_mapping_dict = self.get_ris_tag_desc_mapping_dict()  #all legal tag values
        ris_tag_column_mapping_dict  = self.get_ris_tag_column_mapping_dict()  #ris tag-to-calibre column mapping
        fd_tags_col_count_dict = {}

        md_list = []  #new metadata

        for r in fd:
            if DEBUG: print("r: ", r)
            if not "  -" in r:  #  TY  - JOUR
                continue
            r = r.strip()
            if r.startswith("ER"):  # RIS file may have multiple sets of citation tags; only the first will be used since a single citation can use only one set of RIS tags.
                break
            if not len(r) > 5:  #  TY  -
                continue
            tag = r[0:2].strip()
            if not tag in ris_tag_desc_mapping_dict:  #only legal tag values
                continue
            if not tag in ris_tag_column_mapping_dict :  #only usable tag values
                continue
            value = r[6: ].strip()
            if not value > "":
                continue
            col = ris_tag_column_mapping_dict[tag]
            md = tag,col,value
            md_list.append(md)
            if not col in fd_tags_col_count_dict:
                fd_tags_col_count_dict[col] = 1
            else:
                fd_tags_col_count_dict[col] = 1 + fd_tags_col_count_dict[col]
        #END FOR
        del fd

        if len(md_list) > 0:
            md_list.sort()
            self.update_metadata_from_ris_file(db,book_id,md_list,fd_tags_col_count_dict,ris_tag_desc_mapping_dict)

        del md_list
        del ris_tag_desc_mapping_dict
        del ris_tag_column_mapping_dict
        del fd_tags_col_count_dict
    #--------------------------------------------------------------------------------------------
    def update_metadata_from_ris_file(self,db,book_id,md_list,fd_tags_col_count_dict,ris_tag_desc_mapping_dict):

        import datetime
        self.mydatetime = datetime.datetime

        also_make_tags = self.ercprefs['ERC_ALSO_CREATE_TAGS_FOR_ITEMIZED_RIS_TAGS']
        also_make_tags = also_make_tags.strip() + ","
        also_make_tags = also_make_tags.split(",")
        also_make_tags_set = set()
        for tag in also_make_tags:
            if tag > "":
                tag = tag.strip().upper()
                if tag in ris_tag_desc_mapping_dict:  #all legal tags
                    also_make_tags_set.add(tag)
        #END FOR

        finished_tag_set = set()
        finished_col_set = set()

        authors_list = []
        for r in md_list:
            tag,col,value = r
            if col == "authors":
                authors_list.append(value)
                finished_tag_set.add(tag)
        #END FOR
        new_authors = " & ".join(authors_list)
        #~ if DEBUG: print(new_authors)

        tags_list = []
        for r in md_list:
            tag,col,value = r
            if col == "tags":
                tags_list.append(value)
                finished_tag_set.add(tag)
            else:
                if tag in also_make_tags_set:
                    if not tag in tags_list:
                        tags_list.append(value)
        #END FOR
        new_tags = ", ".join(tags_list)
        #~ if DEBUG: print(new_tags)

        comments_list = []
        for r in md_list:
            tag,col,value = r
            if col == "comments":
                name = ris_tag_desc_mapping_dict[tag]
                v = "<br><br>" + name + ": " + value
                comments_list.append(v)
                finished_tag_set.add(tag)
        #END FOR
        if len(comments_list) > 0:
            new_comments = "".join(comments_list)
        else:
            new_comments = ""
        #~ if DEBUG: print(new_comments)

        identifiers_dict = {}  # identifiers[type] = value
        for r in md_list:
            tag,col,value = r
            if col == 'identifiers' or col == 'identifier':
                if tag == "DO":
                    identifiers_dict['doi'] = value
                elif tag == "SN" and value.startswith("97") and len(value) > 12:
                    identifiers_dict['isbn'] = value
                elif tag == "SN" and "-" in value and len(value) < 13:     # 0732-183X
                    identifiers_dict['issn'] = value
                else:
                    name = ris_tag_desc_mapping_dict[tag]
                    identifiers_dict[name] = value
                finished_tag_set.add(tag)
        #END FOR
        #~ if DEBUG: print(as_unicode(identifiers_dict))

        new_title = None
        new_publisher = None
        new_pubdate = None
        new_series = None
        new_languages = []
        other_columns_list = []

        for r in md_list:
            tag,col,value = r
            if tag in finished_tag_set:
                continue
            if col in finished_col_set:  #disallow multiple RIS titles, publishers, pubdate, series, languages for a single citation
                continue
            if col == "title":
                new_title = value
                finished_tag_set.add(tag)
                finished_col_set.add(col)
            elif col == "publisher":
                new_publisher = value
                finished_tag_set.add(tag)
                finished_col_set.add(col)
            elif col == "pubdate" or col == "published":
                col = "pubdate"
                new_pubdate = value
                finished_tag_set.add(tag)
                finished_col_set.add(col)
            elif col == "series":
                new_series = value
                finished_tag_set.add(tag)
                finished_col_set.add(col)
            elif col == "languages" or col == "language":
                col = "languages"
                value = self.format_languages(value)
                if value is not None:
                    new_languages.append(value)
                    finished_tag_set.add(tag)
                    finished_col_set.add(col)
            else:
                other_columns_list.append(r)  # custom columns
        #END FOR

        if new_authors is not None:
            db.set_authors(book_id, new_authors, notify=True)
        if new_tags is not None:
            db.set_tags(book_id, new_tags, notify=True)
        if new_title is not None:
            db.set_title(book_id, new_title, notify=True)
        if new_publisher is not None:
            db.set_publisher(book_id, new_publisher, notify=True)
        if new_pubdate is not None:
            new_pubdate = self.format_datetime(new_pubdate)
            if new_pubdate is not None:
                db.set_pubdate(book_id, new_pubdate, notify=True)
        if new_series is not None:
            db.set_series(book_id, new_series, notify=True)

        from calibre.ebooks.metadata.book.base import Metadata

        custom_columns = db.field_metadata.custom_field_metadata()

        from calibre.gui2.ui import get_gui
        maingui = get_gui()

        payload = []
        payload.append(book_id)

        id_map = {}

        mi = Metadata(_('Unknown'))

        has_multiple_values_list = []
        for r in other_columns_list:
            tag,col,value = r
            count = fd_tags_col_count_dict[col]
            if count > 1:
                row = col,value
                has_multiple_values_list.append(row)
        #END FOR

        mi.comments = new_comments + "<br><br>--------------------------"

        edit_metadata_action = maingui.iactions['Edit Metadata']

        url_list = []
        for r in md_list:
            tag,col,value = r
            if tag == "UR" or tag == "DO" or DOI_ORG_URL in value or "doi" in value:
                r = tag,value
                url_list.append(r)
        #END FOR

        value_list = None
        finished_col_set = set()

        for r in other_columns_list:
            tag,col,value = r
            if col in finished_col_set:
                continue
            value_list = []
            if col in has_multiple_values_list:
                for row in has_multiple_values_list:
                    column,value = row
                    if col == column:
                        value_list.append(value)
                #END FOR
            else:
                value_list.append(value)
            finished_col_set.add(col)
            if col.startswith("#"):
                if not col in custom_columns:
                    if self.ercprefs['ERC_COPY_UNKNOWN_COLUMNS_TO_COMMENTS'] == '1':
                        for v in value_list:
                            name = ris_tag_desc_mapping_dict[tag]
                            mi.comments = mi.comments + "<br><br>" + name + ": " + v
                        #END FOR
                else:
                    mi.id = book_id
                    custcol = custom_columns[col]
                    if custcol['datatype'] == "composite":
                        continue
                    elif custcol['datatype'] == "ratings":
                        continue
                    elif custcol['datatype'] == "int":
                        value = self.format_numerics(value,"int")
                        if value is None:
                            continue
                    elif custcol['datatype'] == "float":
                        value = self.format_numerics(value,"float")
                        if value is None:
                            continue
                    elif custcol['datatype'] == "bool":
                        if value == "1" or value == "0":
                            value = int(value)
                        elif value.lower() == "true" or value.lower() == "t" or value.lower() == "yes":
                            value = 1
                        elif value.lower() == "false" or value.lower() == "f" or value.lower() == "no":
                            value = 0
                        else:
                            continue
                    elif custcol['datatype'] == "datetime":
                        value = self.format_datetime(value)
                    else:
                        if custcol['is_multiple'] == 1:
                            value = value_list
                        elif len(value_list) > 1:
                            val = ""
                            for v in value_list:
                                val = val + v + " - "
                            value = val[0:-3]
                        else:
                            value = value_list[0]
                    custcol['#value#'] = value
                    mi.set_user_metadata(col, custcol)
            else:
                #unsupported standard Calibre column...not possible unless config.py has a bug...
                pass
        #END FOR

        if len(new_languages) == 0:  #default to English if none
            new_languages.append("eng")
        mi.languages = ",".join(new_languages)

        if len(identifiers_dict) > 0:
            mi.identifiers = identifiers_dict

        id_map[book_id] = mi

        edit_metadata_action.apply_metadata_changes(id_map, callback=None)

        if self.ercprefs['ERC_OPEN_DOI_URL_FOR_CITATION'] == '1':
            if len(url_list) > 0:
                r = url_list,book_id
                self.erc_open_doi_url_control(r)

        del Metadata
        del book_id
        del custom_columns
        del datetime
        del db
        del edit_metadata_action
        del finished_col_set
        del finished_tag_set
        del get_gui
        del has_multiple_values_list
        del id_map
        del identifiers_dict
        del maingui
        del md_list
        del mi
        del new_languages
        del new_pubdate
        del new_publisher
        del new_series
        del new_title
        del other_columns_list
        del payload
        del self.ercprefs
        del self.mydatetime
        del value_list
    #--------------------------------------------------------------------------------------------
    def load_text_file_data(self,path):
        fd = []
        try:
            with open(path, 'r') as f:
                fd = f.readlines()
            f.close()
            del path
            del f
        except Exception as e:
            if DEBUG: print(as_unicode(e))
        return fd
    #--------------------------------------------------------------------------------------------
    def get_ris_tag_column_mapping_dict(self):
        ris_tag_column_mapping_dict  = self.get_default_ris_tag_column_mapping_dict()
        ris_tag_column_mapping_dict  = self.get_customized_ris_tag_column_mapping_dict(ris_tag_column_mapping_dict )
        return ris_tag_column_mapping_dict
    #--------------------------------------------------------------------------------------------
    def get_default_ris_tag_column_mapping_dict(self):
        from calibre_plugins.extract_ris_citations.ris_tag_mapping_dicts import ris_tag_column_mapping_dict
        return ris_tag_column_mapping_dict
    #--------------------------------------------------------------------------------------------
    def get_customized_ris_tag_column_mapping_dict(self,ris_tag_column_mapping_dict):
        from calibre_plugins.extract_ris_citations.config import prefs
        self.ercprefs = prefs
        #~ -------------------------------------------
        #~ Assume the user has not ever customized this file-type plugin before the first time it is executed, so prefs.defaults not yet accessed...
        #~ -------------------------------------------
        if not 'ERC_ALSO_CREATE_TAGS_FOR_ITEMIZED_RIS_TAGS' in self.ercprefs:
            self.ercprefs['ERC_ALSO_CREATE_TAGS_FOR_ITEMIZED_RIS_TAGS'] = '0'
        if not 'ERC_COPY_UNKNOWN_COLUMNS_TO_COMMENTS' in self.ercprefs:
            self.ercprefs['ERC_COPY_UNKNOWN_COLUMNS_TO_COMMENTS'] = '0'
        if not 'ERC_OPEN_DOI_URL_FOR_CITATION' in self.ercprefs:
            self.ercprefs['ERC_OPEN_DOI_URL_FOR_CITATION'] = '0'
        #~ -------------------------------------------
        for tag,column in iteritems(self.ercprefs):
            if tag.isupper() and len(tag) == 2:
                ris_tag_column_mapping_dict[tag] = column
        ris_tag_column_mapping_dict['ER'] = ''
        return ris_tag_column_mapping_dict
    #--------------------------------------------------------------------------------------------
    def get_ris_tag_desc_mapping_dict(self):
        from calibre_plugins.extract_ris_citations.ris_tag_mapping_dicts import ris_tag_desc_mapping_dict
        return ris_tag_desc_mapping_dict
    #--------------------------------------------------------------------------------------------
    def format_datetime(self,date):
        # 2016///    2016/02//    2016/02/28/

        year = "1901"
        month = "1"
        day = "1"

        if "/" in date:
            ssplit = date.split("/")
        elif "-" in date:
            ssplit = date.split("-")
        else:
            return None

        n = len(ssplit)
        if n > 0:
            yr = ssplit[0].strip()
            if yr.isdigit():
                year = yr
                if n > 1:
                    mo = ssplit[1].strip()
                    if mo.isdigit():
                        month = mo
                        if n > 2:
                            da = ssplit[2].strip()
                            if da.isdigit():
                                day = da

        year = int(year)
        month = int(month)
        day = int(day)

        date = self.mydatetime(year, month, day, 12, 0, 0)  # the original RIS dates have no time-zone, so use noon to avoid local-time day-changes in the GUI...for Europe & the Americas, anyway...

        return date
#--------------------------------------------------------------------------------------------
    def format_numerics(self,value,datatype):
        try:
            value = value.strip()
            value = float(value)
            if datatype == "int":
                value = int(value)
        except:
            value = None
        return value
#--------------------------------------------------------------------------------------------
    def format_languages(self,value):
        value = value.lower().strip()
        if value.startswith("eng"):
            value = "eng"
        elif value == "en" or value == "e":
            value = "eng"
        elif value > "":
            pass
        else:
            value = None
        return value
#--------------------------------------------------------------------------------------------
    def erc_open_doi_url_control(self,r):
        import random
        nterval = random.randint(2000,7000)  #new tabs in browser seem to open much more reliably and smoothly when tens of .ris filetypes are being added simultaneously...
        from PyQt5.Qt import QTimer
        QTimer.singleShot(nterval,lambda : self.erc_open_doi_url(r))
        del QTimer
        del random
#--------------------------------------------------------------------------------------------
    def erc_open_doi_url(self,r):
        #~ International DOI Foundation:   https://www.doi.org/      To resolve a DOI Name:    https://doi.org/10.1200/JCO.20.00960    for a DOI of:   10.1200/JCO.20.00960

        #~ DO  - 10.1200/JCO.20.00960
        #~ N1  - doi: 10.1200/JCO.20.00960
        #~ UR  - https://doi.org/10.1200/JCO.20.00960

        doi_url = None
        url_list,book_id = r
        url_list.sort(reverse=True)  #prefer UR if it exists...
        for r in url_list:
            tag,value = r
            if value.startswith(DOI_ORG_URL):
                doi_url = value
                break
            elif tag == "DO":
                value = DOI_ORG_URL + value
                doi_url = value
                break
            elif value.startswith("doi:"):
                value = value[4: ].strip()
                value = DOI_ORG_URL + value
                doi_url = value
                break
        #END FOR

        if doi_url is None:
            del url_list
            del r
            return

        import webbrowser

        webbrowser.open(doi_url, new=0)

        if DEBUG: print("webbrowser.open(doi_url, new=0): ", doi_url)

        del doi_url
        del r
        del url_list
        del value
        del webbrowser
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#END of __init__.py