# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2015,2016,2017,2018,2019,2020,2021,2022,2023 DaltonST'
__my_version__ = "1.0.65"  # Sid's Stuff

import re
from calibre import browser
from calibre.constants import DEBUG
from calibre.ebooks.BeautifulSoup import BeautifulSoup

STDNBR = "stdnbr"

#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
def oclc_classify_webscraping_stdnbr(paramtype,paramvalue):

    if DEBUG:
        print("paramtype: ", str(paramtype))      #~ paramtype:  stdnbr
        print("paramvalue: ", str(paramvalue))   #~ paramvalue:  9780684843285

    ddc_return = "NONE"
    fast_return_list = []
    lcc_return = "NONE"
    oclc_owi_return = "NONE"
    oclc_wi_return = "NONE"
    oclc_worldcat_return = "NONE"
    viaf_author_id_return = "NONE"

    base_stdnr = "http://classify.oclc.org/classify2/ClassifyDemo?search-standnum-txt=[STDNBRGOESHERE]&startRec=0"

    if paramtype ==STDNBR:
        base_stdnr = base_stdnr.replace("[STDNBRGOESHERE]",paramvalue)
        url = base_stdnr
    else:
        if DEBUG: print("Not Supported: ", url)
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return

    br = browser()
    timeout=10000
    try:
        raw = br.open_novisit(url, timeout=timeout).read().strip()
    except Exception as e:
        if DEBUG: print("Error in br.open_novisit: ", str(e))
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return

    if raw is None:
        if DEBUG: print("raw is None; returning from url: ", url)
        del br
        del soup
        return  ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return

    url_html = str(raw)
    soup = BeautifulSoup(url_html)

    if not soup:
        del raw
        del url_html
        del br
        del soup
        if DEBUG: print("Not Soup; returning from url: ", url)
        return  ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return

    #~ ----------------------------------------------------------
    soup
    #~ ----------------------------------------------------------
    try:
        singlereturnfound = False
        table = soup.find("table", "dataTable", id="classSummaryData")  # This only occurs when there is a single return.
        ddc_href_regex = "http:\/\/dewey.org\/webdewey.+query=[0-9][0-9][0-9]+[.]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*"
        for href in table.find_all(href=re.compile(ddc_href_regex)):    #<a href="http://dewey.org/webdewey/executeBrowse.html?type=browse&amp;searchIn=deweyNumbers&amp;query=940.86473092" target="_blank">
            if href is not None:
                s = str(href)
                if DEBUG: print("singlereturnfound:  DDC href: ", s)
                match = re.search("[0-9][0-9][0-9]+[.]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*",s)
                if match:
                    ddc_return = match.group()
                    ddc_return = ddc_return.strip()
                    singlereturnfound = True
                    break
        #END FOR
    except Exception as e:
        singlereturnfound = False
        if DEBUG: print("\nid=classSummaryData:  Exception in DDC: ", str(e))

    if DEBUG:
        print("DDC: ", ddc_return)

    #~ ----------------------------------------------------------
    soup
    #~ ----------------------------------------------------------
    try:
         #~ <a href="\'https://classweb.org/min/minaret?app=Class&amp;mod=Search&amp;table=schedules&amp;table=tables&amp;tid=1&amp;auto=1&amp;menu=/Auto/&amp;iname=span&amp;iterm=D810.S7&amp;subtype=&amp;ilabel=Class%20number\'" target="\'_blank\'">ClassWeb</a>
        lcc_href_regex = "classweb.org.+minaret.+term[=].+subtype"
        if DEBUG: print("lcc_href_regex: ", lcc_href_regex)
        for href in table.find_all(href=re.compile(lcc_href_regex)):
            if href is not None:
                s = str(href)
                if DEBUG: print("LCC href: ", s)
                match = re.search("term=.+subtype",s)
                if match:
                    lcc_return = match.group()
                    lcc_return = lcc_return.replace("term=","")
                    lcc_return = lcc_return.replace("&amp;subtype","")
                    lcc_return = lcc_return.strip()
                    singlereturnfound = True
                    break
        #END FOR
    except Exception as e:
        if DEBUG: print("Exception in LCC: ", str(e))

    if DEBUG:
        print("lcc: ", lcc_return)

    if not singlereturnfound:
        if DEBUG: print("Searching for wi since ddc AND lcc not found using isbn...")
        #~ ----------------------------------------------------------
        soup
        #~ ----------------------------------------------------------
        try:
            for href in soup.find_all(href=re.compile("/classify2/ClassifyDemo\?wi=.+$")): #~ <a href="/classify2/ClassifyDemo?wi=5623603410">
                if href is not None:
                    s = str(href)  # <a href="/classify2/ClassifyDemo?wi=5623603410">An elementary introduction to the Wolfram langauge</a>
                    match = re.search("[0-9][0-9][0-9][0-9][0-9][0-9]+",s)
                    if match:
                        oclc_wi_return = match.group()
                        oclc_wi_return = str(oclc_wi_return)
                        oclc_wi_return = oclc_wi_return.strip()
                        if DEBUG: print("not singlereturnfound:  oclc_wi: ", oclc_wi_return)
                        break
            #END FOR
            if DEBUG: print("ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return\n", ddc_return, lcc_return, str(fast_return_list), oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return)
            if DEBUG: print("Returning With only the OCLC-WI.  Need a re-do using the OCLC-WI.")
            return  ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return
        except Exception as e:
            if DEBUG: print("\nnot singlereturnfound:  Exception in OCLC-WI: ", str(e))

    if DEBUG:
        if ddc_return == "NONE" and lcc_return == "NONE" and oclc_wi_return == "NONE":
            print(soup.prettify())

    #~ -------------------------------------
    soup
    #~ -------------------------------------
    try:
        for href in soup.find_all(href=re.compile("http://classify.oclc.org/classify2/ClassifyDemo\?owi=.+$")): #~ <a href="http://classify.oclc.org/classify2/ClassifyDemo?owi=2996550899">
            if href is not None:
                oclc_owi_return = href.string
                oclc_owi_return = oclc_owi_return.replace("http://classify.oclc.org/classify2/ClassifyDemo?owi=","")
                oclc_owi_return = oclc_owi_return.strip()
                if DEBUG: print("oclc_wi: ", oclc_owi_return)
                break
        #END FOR
    except Exception as e:
        if DEBUG: print("Exception in OCLC-OWI: ", str(e))

    #~ -------------------------------------
    soup
    #~ -------------------------------------
    try:
        soup.find(string=re.compile("FAST Subject Headings"))
        for href in soup.find_all(href=re.compile("\/classify2\/ClassifyDemo\?search-subhead-txt\=.+$")):
            if href is not None:
                fast = href.string
                if not isinstance(fast,str):
                    fast = str(fast)
                if "," in fast:
                    if DEBUG: print("FAST Tag: comma will be replaced with a semicolon: ", fast)
                    fast = fast.replace(",",";").strip()
                fast_return_list.append(fast)
                if DEBUG: print("FAST Tag: ", fast)
        #END FOR
        fast_return_list = list(set(fast_return_list))
    except Exception as e:
        if DEBUG: print("Exception in FAST: ", str(e))
    #~ ----------------------------------------------------------
    soup
    #~ ----------------------------------------------------------
    try:
        soup.find(string=re.compile("VIAF Authority Links"))
        for href in soup.find_all(href=re.compile("http://viaf.org/viaf/.+$")):
            viaf_author_id_return = str(href.string)
            if DEBUG: print("VIAF Authority Links: ", viaf_author_id_return)
            break
        #END FOR
    except Exception as e:
        if DEBUG: print("Exception in VIAF: ", str(e))
    #~ ----------------------------------------------------------
    soup
    #~ ----------------------------------------------------------
    try:
        for href in soup.find_all(href=re.compile("http://www.worldcat.org/oclc/[0-9][0-9][0-9][0-9][0-9][0-9]+$")):  # <a href="http://www.worldcat.org/oclc/10180629" target="_blank">
            s = str(href)
            match = re.search("[0-9][0-9][0-9][0-9][0-9][0-9]+",s)
            if match:
                oclc_worldcat_return = match.group()
                oclc_worldcat_return = str(oclc_worldcat_return)
                oclc_worldcat_return = oclc_worldcat_return.strip()
            if DEBUG: print("Worldcat.org OCLC: ", oclc_worldcat_return)
            break
        #END FOR
    except Exception as e:
        if DEBUG: print("Exception in OCLC-WORLDCAT: ", str(e))

    #~ ----------------------------------------------------------
    #~ if DEBUG: print(soup.prettify())
    #~ ----------------------------------------------------------

    del raw
    del url_html
    del br
    del soup

    if DEBUG: print("ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return\n", ddc_return, lcc_return, str(fast_return_list), oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return)
    if DEBUG: print("Returning With Results, If Any...")

    return  ddc_return, lcc_return, fast_return_list, oclc_owi_return, viaf_author_id_return, oclc_worldcat_return, oclc_wi_return
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
def oclc_classify_webscraping_author_title(param_dict):

    if DEBUG:
        for k,v in param_dict.items():
            print("param_dict: ", k,v)
        #END FOR

    wi_return = "NONE"

    author = param_dict["author"]
    title = param_dict["title"]


    #~ http://classify.oclc.org/classify2/ClassifyDemo?search-title-txt=Foundation%20And%20Empire&search-author-txt=isaac%20asimov&startRec=0

    base_author_title_url = "http://classify.oclc.org/classify2/ClassifyDemo?search-title-txt=[TITLEGOESHERE]&search-author-txt=[AUTHORGOESHERE]&startRec=0"

    base_author_title_url = base_author_title_url.replace("[TITLEGOESHERE]",title)
    base_author_title_url = base_author_title_url.replace("[AUTHORGOESHERE]",author)

    if DEBUG: print("oclc_classify_webscraping_author_title:", base_author_title_url)

    br = browser()
    timeout=10000
    try:
        raw = br.open_novisit(base_author_title_url, timeout=timeout).read().strip()
    except Exception as e:
        if DEBUG: print("Error in br.open_novisit: ", str(e))
        return wi_return

    if raw is None:
        if DEBUG: print("raw is None; returning from url: ", url)
        del br
        del soup
        return  wi_return
    url_html = str(raw)
    soup = BeautifulSoup(url_html)
    if not soup:
        del raw
        del url_html
        del br
        del soup
        if DEBUG: print("Not Soup; returning from url: ", url)
        return  wi_return

    #~ ----------------------------------------------------------
    wi_href_regex = "\/classify2\/ClassifyDemo\?wi=[0-9][0-9][0-9][0-9]+"   # <a href="/classify2/ClassifyDemo?wi=1807153559">
    if DEBUG: print("wi_href_regex: ", wi_href_regex)
    soup
    for href in soup.find_all(href=re.compile(wi_href_regex)):
        if href is not None:
            s = str(href)
            if DEBUG: print("found:  href: ", s)
            match = re.search("[0-9][0-9][0-9][0-9]+",s)
            if match:
                wi_return = match.group()
                del match
                break
    #END FOR

    #~ ----------------------------------------------------------
    #~ if DEBUG: print(soup.prettify())
    #~ ----------------------------------------------------------

    del raw
    del url_html
    del br
    del soup

    if DEBUG: print("\nwi_return", wi_return)
    if DEBUG: print("\nReturning Normally With Result:  wi_return:", wi_return)

    return  wi_return
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#~ END OF CLASSIFY_WEB_SERVICE_WEBSCRAPING