#!/usr/bin/env python
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata import check_isbn
from lxml.html import fromstring
import calibre_plugins.Adlibris.config as cfg
import time
import re

__license__ = "GPL v3"
__copyright__ = "2025, computer, based on the work of J-H, Pr.BarnArt and Grant Drake"
__docformat__ = "restructuredtext en"


from urllib.parse import quote
from queue import Empty, Queue


class Adlibris(Source):

    name = "Adlibris"
    description = "Downloads metadata & covers from Adlibris"
    author = "computer"
    version = (0, 3, 0)
    minimum_calibre_version = (0, 6, 0)

    ID_NAME = "isbn"
    capabilities = frozenset(["identify", "cover"])
    touched_fields = frozenset(
        [
            "title",
            "authors",
            "identifier:isbn",
            "comments",
            "publisher",
            "pubdate",
            "languages",
            "series",
        ]
    )
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    ADLIBRIS_URL = "https://www.adlibris.com"
    base_url = "https://www.adlibris.com/fi/haku?q="

    def config_widget(self):
        """
        Overriding the default configuration screen for our own custom configuration
        """
        from calibre_plugins.Adlibris.config import ConfigWidget

        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        isbn = identifiers.get("isbn", None)
        if isbn:
            storeCountry = cfg.plugin_prefs[cfg.STORE_NAME][cfg.COUNTRY]
            self.base_url = self.ADLIBRIS_URL + storeCountry
            return (self.ID_NAME, isbn, f"{self.base_url}{isbn}")

    def create_query(self, log, title=None, authors=None, identifiers={}):
        q = ""
        au = ""

        if title:
            title_tokens = list(
                self.get_title_tokens(
                    title, strip_joiners=False, strip_subtitle=True)
            )
            if title_tokens:
                tokens = [quote(t) for t in title_tokens]
                q = "+".join(tokens)
        else:  # return None if there is no title
            return None

        if authors:
            author_tokens = self.get_author_tokens(
                authors, only_first_author=True)
            if author_tokens:
                tokens = [quote(t) for t in author_tokens]
                au = "+" + "+".join(tokens)
        return f"{self.base_url}{q}{au}"

    def get_cached_cover_url(self, identifiers):
        url = None
        isbn = identifiers.get("isbn", None)
        if isbn is not None:
            url = self.cached_identifier_to_cover_url(isbn)
        return url

    def identify(
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
    ):
        storeCountry = cfg.plugin_prefs[cfg.STORE_NAME][cfg.COUNTRY]
        self.base_url = self.ADLIBRIS_URL + storeCountry

        matches = []
        br = self.browser

        # Try ISBN search first if available
        isbn = check_isbn(identifiers.get("isbn", None))
        isbn_match_failed = True
        
        if isbn is not None:
            matches, isbn_match_failed = self._search_by_isbn(isbn, br, log, timeout)

        # Fall back to title/author search if needed
        if not isbn or isbn_match_failed:
            matches = self._search_by_title_author(title, authors, identifiers, br, log, timeout, abort)

        if abort.is_set():
            return

        if not matches:
            log.error("No matches found")
            return

        # Create worker threads to fetch metadata for each match
        from calibre_plugins.Adlibris.worker import Worker

        workers = [
            Worker(match_url, result_queue, br, log, i, self)
            for i, match_url in enumerate(matches)
        ]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None
    
    def _search_by_isbn(self, isbn, br, log, timeout):
        """Search for book by ISBN and return all editions"""
        matches = []
        isbn_match_failed = True
        
        try:
            query = f"{self.base_url}{isbn}"
            response = br.open_novisit(query, timeout=timeout)
            raw = response.read().strip()
            raw = raw.decode("utf8", errors="replace")
            
            if not raw:
                log.error(f"Failed to get raw result for query: {query}")
                return matches, isbn_match_failed
            
            root = fromstring(clean_ascii_chars(raw))
            
            # For ISBN searches, get multiple editions of the book
            url_nodes = root.xpath(
                '//div [@class="search-result__list-view__product__information"]/h4/a[@class="search-result__product__name"]/@href'
            )
            
            if url_nodes:
                max_editions = cfg.plugin_prefs[cfg.STORE_NAME].get(
                    cfg.KEY_MAX_EDITIONS, 
                    cfg.DEFAULT_STORE_VALUES[cfg.KEY_MAX_EDITIONS]
                )
                log.info(f"Found {len(url_nodes)} edition(s) for ISBN {isbn}, will fetch up to {max_editions}")
                
                for idx, url_node in enumerate(url_nodes):
                    if idx >= max_editions:
                        log.info(f"Reached max editions limit ({max_editions}), stopping")
                        break
                    full_url = f"{self.ADLIBRIS_URL}{url_node}"
                    matches.append(full_url)
                    log.info(f"Added edition {idx + 1}: {full_url}")
                isbn_match_failed = False
            else:
                log.info(f"No editions found for ISBN {isbn}")
        except:
            log.exception("Failed to parse adlibris.com page for ISBN query")
        
        return matches, isbn_match_failed
    
    def _search_by_title_author(self, title, authors, identifiers, br, log, timeout, abort):
        """Search for book by title and author"""
        matches = []
        
        query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
        log.info(f"Query is: {query}")
        
        if query is None:
            log.error("Insufficient metadata to construct query")
            return matches
        
        try:
            response = br.open_novisit(query, timeout=timeout)
            raw = response.read().strip()
            raw = raw.decode("utf8", errors="replace")
            
            if not raw:
                log.error(f"Failed to get raw result for query: {query}")
                return matches
            
            root = fromstring(clean_ascii_chars(raw))
        except:
            log.exception(f"Failed to parse adlibris.com page for query: {query}")
            return matches
        
        # Parse search results
        self._parse_search_results(log, title, authors, root, matches, timeout)
        
        # If no matches found, try with simplified title
        if not matches:
            simplified_title = self._simplify_title(title)
            if simplified_title and len(simplified_title) < len(title):
                log.info(f"No matches found, retrying with simplified title: {simplified_title}")
                # Recursive call with simplified title
                from queue import Queue
                temp_queue = Queue()
                return self._search_by_title_author(
                    simplified_title, authors, identifiers, br, log, timeout, abort
                )[0] if not abort.is_set() else matches
        
        return matches
    
    def _simplify_title(self, title):
        """Remove subtitle and other unnecessary parts from title"""
        if not title:
            return title
        
        # Match everything before common separators
        find = re.compile(r"^([^.|\-|—|,|:|;]*)")
        match = re.search(find, title)
        if match:
            return match.group(0).strip()
        return title

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
        """Parse search results and add matching books to matches list"""
        results = root.xpath(
            '//div [@class="search-result__list-view__product__wrapper"]/div/div [@class="search-result__list-view__product__image-and-information-container "]'
        )
        if not results:
            return

        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
        
        for result in results:
            result_url = result.xpath(
                'div/h4/a[@class="search-result__product__name"]/@href'
            )
            result_id = result.xpath(
                'div/h4/a[@class="search-result__product__name"]')

            if not result_url or not result_id:
                continue

            title = result_id[0].text_content().strip()
            # Normalize whitespace
            while "  " in title:
                title = title.replace("  ", " ")
            
            # Strip off any series information from the title
            if "(" in title:
                title = title.rpartition("(")[0].strip()
            
            # Check if title matches
            if self._is_title_match(title, orig_title):
                matches.append(f"{self.ADLIBRIS_URL}{result_url[0]}")
                if len(matches) >= max_results:
                    break
            else:
                log.debug(f"Rejecting as not close enough match: {title}")
    
    def _is_title_match(self, title, orig_title):
        """Check if a title matches the original title"""
        lower_title = lower(title)
        lower_orig_title = lower(orig_title)
        
        # Exact match
        if lower_title == lower_orig_title:
            return True
        
        # Title starts with original title
        if lower_title.startswith(lower_orig_title):
            return True
        
        return False

    def download_cover(
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
    ):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info("No cached cover found, running identify")
            rq = Queue()
            self.identify(
                log, rq, abort, title=title, authors=authors, identifiers=identifiers
            )
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(
                key=self.identify_results_keygen(
                    title=title, authors=authors, identifiers=identifiers
                )
            )
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info("No cover found")
            return

        if abort.is_set():
            return
        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception("Failed to download cover from:", cached_url)


if __name__ == "__main__":  # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (
        test_identify_plugin,
        title_test,
        authors_test,
        series_test,
    )

    test_identify_plugin(
        Adlibris.name,
        [
            (  # A book without an ISBN
                {
                    "title": "Harry Potter ja kuoleman varjelukset",
                    "authors": ["J. K. Rowling"],
                },
                [
                    title_test(
                        "Harry Potter ja kuoleman varjelukset", exact=True),
                    authors_test(["J. K. Rowling"]),
                ],
            ),
            (  # A book with an ISBN
                {
                    "identifiers": {"isbn": "9789512414550"},
                    "title": "Kuolematon kunnia",
                    "authors": ["Anne Holt"],
                },
                [
                    title_test("Kuolematon kunnia", exact=True),
                    authors_test(["Anne Holt"]),
                ],
            ),
            (  # A book with an ISBN
                {
                    "identifiers": {"isbn": "9789174295542"},
                    "title": "Polis",
                    "authors": ["Jo Nesb\xf8"],
                },
                [
                    title_test("Polis", exact=True),
                    authors_test(["Jo Nesb\xf8"]),
                ],
            ),
        ],
        fail_missing_meta=True,
    )