# -*- coding: utf-8 -*-
__license__   = 'GPL v3'
__copyright__ = '2015,2016,2017,2018,2019,2020,2021,2022,2023 DaltonST 2024 DJG'
__my_version__ = "2.0.1"

import re
from calibre import browser
from calibre.constants import DEBUG
# from calibre.ebooks.BeautifulSoup import BeautifulSoup
# import requests
import xml.etree.ElementTree as ET

STDNBR = "stdnbr"

#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
def oclc_classify_webscraping_stdnbr(paramtype, paramvalue):
    if DEBUG:
        print("paramtype: ", str(paramtype))
        print("paramvalue: ", str(paramvalue))

    ddc_return = "NONE"
    lcc_return = "NONE"
    fast_return_list = []
    oclc_owi_return = "NONE"
    oclc_wi_return = "NONE"
    oclc_worldcat_return = "NONE"
    viaf_author_id_return = "NONE"

    base_stdnr = f"http://lx2.loc.gov:210/LCDB?version=1.1&operation=searchRetrieve&recordSchema=mods&maximumRecords=20&query=dc.identifier=[STDNBRGOESHERE]"

    if paramtype ==STDNBR:
        base_stdnr = base_stdnr.replace("[STDNBRGOESHERE]",paramvalue)
        base_url = base_stdnr
    br = browser()
    timeout=10000
    try:
        raw = br.open_novisit(base_url, timeout=timeout).read().strip()
 #       response.raise_for_status()
 #       raw = response.text.strip()
    except Exception as e:
        if DEBUG: print("Error in br.open_novisit: ", str(e))
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return


    if not raw:
        if DEBUG: print("raw is None; returning from url: ", base_url)
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

    try:
        root = ET.fromstring(raw)
    except ET.ParseError as e:
        if DEBUG: print("Error parsing XML: ", str(e))
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

    namespaces = {
        'mods': 'http://www.loc.gov/mods/v3'
    }

    # Find DDC
    try:
        ddc_element = root.find(".//mods:classification[@authority='ddc']", namespaces)
        if ddc_element is not None:
            ddc_return = ddc_element.text.strip()
            if DEBUG: print("DDC: ", ddc_return)
    except Exception as e:
        if DEBUG: print("Exception in finding DDC: ", str(e))

    # Find LCC
    try:
        lcc_element = root.find(".//mods:classification[@authority='lcc']", namespaces)
        if lcc_element is not None:
            lcc_return = lcc_element.text.strip()
            if DEBUG: print("LCC: ", lcc_return)
    except Exception as e:
        if DEBUG: print("Exception in finding LCC: ", str(e))

    if DEBUG: print("Returning Results: DDC: ", ddc_return, ", LCC: ", lcc_return)

    return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
def oclc_classify_webscraping_author_title(author, title):
    if DEBUG:
        print("author: ", str(author))
        print("title: ", str(title))

    ddc_return = "NONE"
    lcc_return = "NONE"

    base_url = f"http://lx2.loc.gov:210/LCDB?query=dc.creator={author}+AND+dc.title={title}&version=1.1&operation=searchRetrieve&recordSchema=mods&&maximumRecords=10"
    br = browser()
    timeout=10000

    try:
        raw = br.open_novisit(base_url, timeout=timeout).read().strip()
#        response.raise_for_status()
#        raw = response.text.strip()
    except Exception as e:
        if DEBUG: print("Error in br.open_novisit: ", str(e))
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

    if not raw:
        if DEBUG: print("raw is None; returning from url: ", base_url)
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

    try:
        root = ET.fromstring(raw)
    except ET.ParseError as e:
        if DEBUG: print("Error parsing XML: ", str(e))
        return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return

    namespaces = {
        'mods': 'http://www.loc.gov/mods/v3'
    }

    # Find DDC
    try:
        ddc_element = root.find(".//mods:classification[@authority='ddc']", namespaces)
        if ddc_element is not None:
            ddc_return = ddc_element.text.strip()
            if DEBUG: print("DDC: ", ddc_return)
    except Exception as e:
        if DEBUG: print("Exception in finding DDC: ", str(e))

    # Find LCC
    try:
        lcc_element = root.find(".//mods:classification[@authority='lcc']", namespaces)
        if lcc_element is not None:
            lcc_return = lcc_element.text.strip()
            if DEBUG: print("LCC: ", lcc_return)
    except Exception as e:
        if DEBUG: print("Exception in finding LCC: ", str(e))

    if DEBUG: print("Returning Results: DDC: ", ddc_return, ", LCC: ", lcc_return)
    # Find FAST Headings
    try:
        fast_elements = root.findall(".//mods:subject/mods:topic[@authority='lcsh']", namespaces)
        for element in fast_elements:
            fast_return_list.append(element.text.strip())
        if DEBUG: print("FAST subjects: ", fast_return_list)
    except Exception as e:
        if DEBUG: print("Exception in finding FAST subjects: ", str(e))
    if DEBUG: print("Returning Results: DDC: ", ddc_return, ", LCC: ", lcc_return, ", FAST subjects: ", fast_return_list)        
        

    return ddc_return, lcc_return, fast_return_list, oclc_owi_return, oclc_wi_return, oclc_worldcat_return, viaf_author_id_return
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
