#!/usr/bin/env python
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata import check_isbn
from lxml.html import fromstring
import calibre_plugins.Adlibris.config as cfg
import time
import re

__license__ = "GPL v3"
__copyright__ = "2022, J-H based on the work by Pr.BarnArt and Grant Drake"
__docformat__ = "restructuredtext en"


from urllib.parse import quote
from queue import Empty, Queue

# from calibre import ipython


class Adlibris(Source):

    name = "Adlibris"
    description = "Downloads metadata & covers from Adlibris"
    author = "J-H"
    version = (0, 2, 0)
    minimum_calibre_version = (0, 6, 0)

    ID_NAME = "isbn"
    capabilities = frozenset(["identify", "cover"])
    touched_fields = frozenset(
        [
            "title",
            "authors",
            "identifier:isbn",
            "comments",
            "publisher",
            "pubdate",
            "languages",
        ]
    )
    has_html_comments = True
    supports_gzip_transfer_encoding = True
    # cached_cover_url_is_reliable = True

    ADLIBRIS_URL = "https://www.adlibris.com"
    base_url = "https://www.adlibris.com/fi/haku?q="
    # sort_by = "&sort_by=Relevance&order_by=Desc&filter=format_fi:e-kirja"

    def config_widget(self):
        """
        Overriding the default configuration screen for our own custom configuration
        """
        from calibre_plugins.Adlibris.config import ConfigWidget

        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        isbn = identifiers.get("isbn", None)
        if isbn:
            storeCountry = cfg.plugin_prefs[cfg.STORE_NAME][cfg.COUNTRY]
            self.base_url = self.ADLIBRIS_URL + storeCountry
            return (self.ID_NAME, isbn, f"{self.base_url}{isbn}")

    def create_query(self, log, title=None, authors=None, identifiers={}):
        q = ""
        au = ""

        if title:
            title_tokens = list(
                self.get_title_tokens(
                    title, strip_joiners=False, strip_subtitle=True)
            )
            if title_tokens:
                tokens = [quote(t) for t in title_tokens]
                q = "+".join(tokens)
        else:  # return None if there is no title
            return None

        if authors:
            author_tokens = self.get_author_tokens(
                authors, only_first_author=True)
            if author_tokens:
                tokens = [quote(t) for t in author_tokens]
                au = "+" + "+".join(tokens)
        return f"{self.base_url}{q}{au}"

    def get_cached_cover_url(self, identifiers):
        url = None
        isbn = identifiers.get("isbn", None)
        if isbn is not None:
            url = self.cached_identifier_to_cover_url(isbn)
        return url

    def identify(
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
    ):

        storeCountry = cfg.plugin_prefs[cfg.STORE_NAME][cfg.COUNTRY]
        self.base_url = self.ADLIBRIS_URL + storeCountry

        matches = []

        br = self.browser

        isbn = check_isbn(identifiers.get("isbn", None))
        isbn_match_failed = True
        if isbn is not None:
            try:
                query = f"{self.base_url}{isbn}"
                response = br.open_novisit(query, timeout=timeout)
                raw = response.read().strip()
                raw = raw.decode("utf8", errors="replace")
                if not raw:
                    log.error(f"Failed to get raw result for query: {query}")
                    # return
                else:
                    root = fromstring(clean_ascii_chars(raw))
                    url_node = root.xpath(
                        '//div [@class="search-result__list-view__product__information"]/h4/a[@class="search-result__product__name"]/@href'
                    )
                    if url_node:
                        query = f"{self.ADLIBRIS_URL}{url_node[0]}"
                        matches.append(query)
                        isbn_match_failed = False
            except:
                msg = "Failed to parse adlibris.com page for query."
                log.exception(msg)
                # return msg

            # For successful ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
        if not isbn or isbn_match_failed:
            query = self.create_query(
                log, title=title, authors=authors, identifiers=identifiers
            )
            log.info(f"Query is: {query}")
            if query is None:
                log.error("Insufficient metadata to construct query")
                return
            try:
                response = br.open_novisit(query, timeout=timeout)
                raw = response.read().strip()
                raw = raw.decode("utf8", errors="replace")
                if not raw:
                    log.error(f"Failed to get raw result for query: {query}")
                    return
                root = fromstring(clean_ascii_chars(raw))
            except:
                msg = f"Failed to parse adlibris.com page for query: {query}"
                log.exception(msg)
                return
            # Now grab the matches from the search results, provided the
            # title and authors appear to be for the same book
            self._parse_search_results(
                log, title, authors, root, matches, timeout)

        if abort.is_set():
            return
        # If no matches are found: try to remove 'unnecessary' part of the title and make a new search
        if not matches:
            find = re.compile(r"^([^.|\-|–|,|:|;]*)")
            tulos = re.search(find, title).group(0)
            tulos = tulos.strip()
            if len(tulos) < len(title):
                log.info(f"Title splitted ({tulos}) and new search")
                return self.identify(
                    log,
                    result_queue,
                    abort,
                    title=tulos,
                    authors=authors,
                    timeout=timeout,
                )
            log.error(f"No matches found")
            return

        from calibre_plugins.Adlibris.worker import Worker

        workers = [
            Worker(match_i, result_queue, br, log, i, self)
            for i, match_i in enumerate(matches)
        ]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(
        self, log, orig_title, orig_authors, root, matches, timeout
    ):
        results = root.xpath(
            '//div [@class="search-result__list-view__product__wrapper"]/div/div [@class="search-result__list-view__product__image-and-information-container "]'
        )
        if not results:
            return

        def ismatch(title):
            match = False
            lower_title = lower(title)
            lower_orig_title = lower(orig_title)
            if lower(title) == lower(orig_title):
                match = True
            elif lower_title.startswith(lower_orig_title):
                match = True
            return match

        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
        for result in results:
            result_url = result.xpath(
                'div/h4/a[@class="search-result__product__name"]/@href'
            )
            result_id = result.xpath(
                'div/h4/a[@class="search-result__product__name"]')

            title = result_id[0].text_content().strip()
            while "  " in title:
                title = title.replace("  ", " ")
            # Strip off any series information from the title
            if "(" in title:
                title = title.rpartition("(")[0].strip()
                title_tokens = list(self.get_title_tokens(orig_title))
            if not ismatch(title):
                log.error(f"Rejecting as not close enough match: {title}")
            else:
                matches.append(f"{self.ADLIBRIS_URL}{result_url[0]}")
                if len(matches) >= max_results:
                    break

    def parse_true(self):
        return True

    def download_cover(
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
    ):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info("No cached cover found, running identify")
            rq = Queue()
            self.identify(
                log, rq, abort, title=title, authors=authors, identifiers=identifiers
            )
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(
                key=self.identify_results_keygen(
                    title=title, authors=authors, identifiers=identifiers
                )
            )
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info("No cover found")
            return

        if abort.is_set():
            return
        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception("Failed to download cover from:", cached_url)


if __name__ == "__main__":  # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (
        test_identify_plugin,
        title_test,
        authors_test,
        series_test,
    )

    test_identify_plugin(
        Adlibris.name,
        [
            (  # A book without an ISBN
                {
                    "title": "Harry Potter ja kuoleman varjelukset",
                    "authors": ["J. K. Rowling"],
                },
                [
                    title_test(
                        "Harry Potter ja kuoleman varjelukset", exact=True),
                    authors_test(["J. K. Rowling"]),
                ],
            ),
            (  # A book with an ISBN
                {
                    "identifiers": {"isbn": "9789512414550"},
                    "title": "Kuolematon kunnia",
                    "authors": ["Anne Holt"],
                },
                [
                    title_test("Kuolematon kunnia", exact=True),
                    authors_test(["Anne Holt"]),
                ],
            ),
            (  # A book with an ISBN
                {
                    "identifiers": {"isbn": "9789174295542"},
                    "title": "Polis",
                    "authors": ["Jo Nesb\xf8"],
                },
                [
                    title_test("Polis", exact=True),
                    authors_test(["Jo Nesb\xf8"]),
                ],
            ),
        ],
        fail_missing_meta=True,
    )
