# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2015,2016,2017,2018,2019,2020,2021,2022,2023 DaltonST'
__my_version__ = "1.0.64"  # Sid's Stuff

import re
import time
from time import sleep
from calibre import browser
from calibre.constants import DEBUG
from calibre.ebooks.BeautifulSoup import BeautifulSoup

from polyglot.builtins import as_unicode, iteritems

from calibre_plugins.library_codes.ui import SOURCE_TYPE_VIAF_AUTHOR_ID

#~ SOURCE_TYPE_VIAF_AUTHOR_ID = "http://viaf.org/viaf/[REFERENCENUMBER]/"                      # get isni and lccn using viaf_author_id

#--------------------------------------------------------------------------------------------
def library_codes_generic_webscraping(source_type,source_dict,final_list):

    final_results_dict = {}

    if source_type == SOURCE_TYPE_VIAF_AUTHOR_ID:
        active_target_url = "http://viaf.org/viaf/[REFERENCENUMBER]/"
    else:
        return final_results_dict

    #-----------------------------------------------
    # MAIN FUNCTION
    #-----------------------------------------------

    if DEBUG: print("\n\n-----------------library_codes_generic_webscraping---------------------\n\n")

    for book in final_list:

        sleep(1)

        book = as_unicode(book)
        if book in source_dict:
            source_v = source_dict[book]  # viaf_author_id (currently)
        else:
            if DEBUG: print("source_v == None; skipping:   book, source_type ", book, source_type)
            continue

        if DEBUG: print("..........library_codes_generic_webscraping:   main function:    source_dict: ", book,source_v, source_type)

        target_url = active_target_url

        target_url = target_url.replace("[REFERENCENUMBER]",source_v)

        if DEBUG: print("-----------------------------------------------")
        if DEBUG: print("current target_url is: ", target_url)

        results_list = []

        if source_type == SOURCE_TYPE_VIAF_AUTHOR_ID:
            if DEBUG: print("[1] SOURCE_TYPE_VIAF_AUTHOR_ID")
            html_page,soup,html_raw = download_html(source_type,target_url,br_type="novisit")
            new_row1,new_row2 = parse_html_viaf(source_v,html_page,html_raw)
            if DEBUG: print("new_row1: ", as_unicode(new_row1),"new_row2: ", as_unicode(new_row2))
        else:
            continue

        try:
            if new_row1:
                results_list.append(new_row1)
            if new_row2:
                results_list.append(new_row2)
            if new_row3:
                results_list.append(new_row3)
            if new_row4:
                results_list.append(new_row4)
            if new_row5:
                results_list.append(new_row5)
        except:
            pass

        if len(results_list) > 0:
                final_results_dict[source_v] = results_list
        try:
            del new_row1
            del new_row2
            del new_row3
            del new_row4
            del new_row5
        except:
            pass

        del results_list

    #END FOR

    try:
        del soup
        del html_page
        del html_raw
        del source_dict
        del final_list
    except:
        pass

    return final_results_dict
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------
def download_html(source_type,target_url,br_type="novisit"):
    soup = ""
    html_page = ""
    html_raw = ""

    try:
        br = browser()
        timeout=5000
        try:
            if br_type == "novisit":
                if DEBUG: print(br_type)
                html_raw = br.open_novisit(target_url, timeout=timeout).read().strip()
            else:
                # for future use only.
                if DEBUG: print(br_type)
                #~ response = br.open(target_url, timeout=timeout).read().strip()
                #~ if DEBUG: print("type(response): ", type(response))  #bytes
                #~ html_raw = as_unicode(response)
                #~ del response
        except Exception as e:
            if DEBUG: print("Error in br type: ", br_type, ": ", str(e))
        #~ --------------------------------------
        soup = BeautifulSoup(html_raw)
        html_page = soup.prettify()    # take note...

    except Exception as e:
        if DEBUG: print(as_unicode(e))

    if not soup:
        if DEBUG: print("not soup")
        soup = ""
    if not html_page:
        html_page = ""
    if not html_raw:
        html_raw = ""

    return html_page,soup,html_raw
#-----------------------------------------------
#-----------------------------------------------
#-----------------------------------------------
def parse_html_viaf(v,html_page,html_raw):

    # v = viaf_author_id

    new_row1 = None
    new_row2 = None
    s_list = None

    try:
        html_page = as_unicode(html_page)

        s_list = html_page.split("\n")

        #~ if DEBUG: print("number of rows in s_list: ", str(len(s_list)))

        for r in s_list:
            if (new_row1 is None) or (new_row2 is None):
                pass
            else:
                break

            r = r.strip()

            #~ if DEBUG: print("     r: ", r)

            if new_row1 is None:
                if r.startswith("ISNI|"):                                                             # <a href="/viaf/sourceID/ISNI|0000000121340483">ISNI|0000000121340483</a>
                    if DEBUG: print(">> ISNI|: ", r)
                    myre = '[0-9][0-9][0-9][0-9][0-9]+'
                    match1 = re.search(myre,r)
                    if match1:
                        isni = match1.group()
                        del match1
                        isni = isni.strip()
                        if DEBUG: print("isni is: ", as_unicode(isni))
                        new_row1 = "isni",v,isni
                        continue

            if new_row2 is None:
                if r.startswith("LC|n"):                                                           # <ns2:source nsid="n50012900">LC|n  50012900</ns2:source>
                    if DEBUG: print(">> LC|n: ", r)
                    myre = '[0-9][0-9][0-9][0-9][0-9]+'
                    match2 = re.search(myre,r)
                    if match2:
                        lccn = match2.group()
                        del match2
                        lccn = lccn.strip()
                        lccn = "n" + lccn      #as of: 20230405 so permalink can be used:  example:  https://lccn.loc.gov/n79018774
                        if DEBUG: print("lccn is: ", as_unicode(lccn))
                        new_row2 = "lccn",v,lccn
                        continue

        #END FOR
    except Exception as e:
        if DEBUG: print("Exception in parse_html_viaf: ", as_unicode(e))

    del s_list

    if DEBUG: print("\n\nparse_html_viaf:  returning with new_row1,new_row2: ", str(new_row1),str(new_row2))

    return new_row1,new_row2
#-----------------------------------------------
#-----------------------------------------------
#-----------------------------------------------
#END OF library_codes_webscraping_api
