# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__ = 'GPL v3'
__copyright__ = '2024, David Forrester, Sosie & seeder'
__docformat__ = 'restructuredtext en'

try:
    from urllib.parse import quote
except ImportError:
    from urllib2 import quote

try:
    from queue import Empty, Queue
except ImportError:
    from Queue import Empty, Queue

import re
from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from lxml.html import fromstring
import calibre_plugins.xTrance.globals as g


def load_url(log, query, br):
    try:
        log.info('-- querying: %s' % query)
        response = br.open_novisit(str(query))
    except Exception as e:
        msg = '*** Failed to make identify query: %r - %s ' % (query, e)
        log.exception(msg)
        raise Exception(msg)
    try:
        raw = response.read().strip().decode('utf-8', errors='replace').lstrip('<?xml version="1.0" encoding="utf-8"?>')
        if not raw:
            msg = '*** Failed to get raw result for query: %r' % query
            log.error(msg)
            raise Exception(msg)
        root = fromstring(clean_ascii_chars(raw))
    except:
        msg = '*** Failed to parse page for query: %r' % query
        log.exception(msg)
        raise Exception(msg)
    return root

try:
    load_translations()
except NameError:
    pass # load_translations() added in calibre 1.9

class XTrance(Source):
    name = 'xTrance'
    description = _('Downloads metadata and covers from xTrance.info')
    author = 'David Forrester, Sosie & seeder'
    version = (2, 0, 7)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:xtrance', 'identifier:isbn', 'rating',
                                'comments', 'publisher', 'pubdate', 'series', 'series_index', 'tags', 'languages'])
    has_html_comments = True
    supports_gzip_transfer_encoding = False
    prefer_results_with_isbn = False
    ignore_ssl_errors = True
    can_get_multiple_covers = True
    

    BOOK_INFO_URL = 'http://xtrance.info/new/?mainpage=pub&subpage=detail&id='
    COVER_URL = 'http://xtrance.info/new/?mainpage=file&subpage=publication_image&id='
    SEARCH_TITLE_API_URL = 'http://xtrance.info/new/?xml_pub=100&book_name='

    config_message = _('Plugin version: <b>%s</b> - Report errors and suggestions through'
                       ' <a href="https://ebookforum.sk/viewtopic.php?f=15&t=8777">eBookforum</a> or <a href="https://www.mobileread.com/forums/showpost.php?p=4400879">MobileRead</a>.'
                       )%str(version).strip('()').replace(', ', '.')


    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.xTrance.config import ConfigWidget
        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        book_id = identifiers.get('xtrance_id', None)
        book_id = identifiers.get('xtrance', book_id)
        return ('xTrance', book_id, '%s%s' % (XTrance.BOOK_INFO_URL, book_id)) if book_id else None

    def get_cached_cover_url(self, identifiers):
        book_id = identifiers.get('xtrance_id', None)
        book_id = identifiers.get('xtrance', book_id)
        if book_id is None:
            isbn = check_isbn(identifiers.get('isbn', None))
            if isbn is None:
                book_id = self.cached_isbn_to_identifier(isbn)
        if book_id is not None:
            url = self.cached_identifier_to_cover_url(book_id)
            return url if url else None
        
    @staticmethod
    def search_title_for_metadata(title, identifiers):
        meta_dict = dict()
        if not title:
            return title, identifiers, meta_dict
        search_regex = re.compile(r"(?:(?:"
                                  r"isbn|ean|"
                                  r"xtrance_id|xtrance|xtr|"
                                  r"legie|"
                                  r"dbk|dbknih|databazeknih|"
                                  r"dbkp|dbk_povidka|databazeknih_povidka|dbknih_povidka|"
                                  r"publisher|pubdate|pubyear|language|lang"
                                  r"):(?:\S*)(?: |$))")
        meta_title = re.findall(search_regex, title)
        # Remove matched metadata from title
        title = re.sub(pattern=search_regex, string=title, repl='')
        title = ' '.join(title.split())

        meta_dict = dict([i.rstrip(' ').split(':', 1) for i in meta_title])

        identifiers_mapping = {
            'xtrance': ['xtrance_id', 'xtr'],
            'isbn': ['isbn', 'ean']
        }
        for identifier, keys in identifiers_mapping.items():
            for key in keys:
                value = meta_dict.get(key, None)
                if value is not None:
                    identifiers[identifier] = value

        meta_dict_mapping = {
            'pubdate': ['pubdate', 'pubyear'],
            'publisher': ['publisher'],
            'language': ['language', 'lang'],
        }
        remapped_meta_dict = dict()
        for identifier, keys in meta_dict_mapping.items():
            for key in keys:
                value = meta_dict.get(key, None)
                if value is not None:
                    remapped_meta_dict[identifier] = value
        meta_dict = remapped_meta_dict

        if identifiers.get('pubdate', None) and meta_dict.get('pubdate', None) is None:
            meta_dict['pubdate'] = identifiers['pubdate']
            identifiers.pop('pubdate')
        if identifiers.get('pubyear', None) and meta_dict.get('pubyear', None) is None:
            meta_dict['pubdate'] = identifiers['pubyear']
            identifiers.pop('pubyear')
        if identifiers.get('publisher', None) and meta_dict.get('publisher', None) is None:
            meta_dict['publisher'] = identifiers['publisher']
            identifiers.pop('publisher')

        if identifiers.get('xtr', None):
            identifiers['xtrance'] = identifiers['xtr']
            identifiers.pop('xtr')
        if identifiers.get('xtrance_id', None):
            identifiers['xtrance'] = identifiers['xtrance_id']
            identifiers.pop('xtrance_id')
        return title, identifiers, meta_dict
        
    def identify_results_keygen(self, title=None, authors=None,
            identifiers={}):
        from calibre_plugins.xTrance.compare import MetadataCompareKeyGen
        def keygen(mi):
            return MetadataCompareKeyGen(mi, self, title, authors,
                identifiers)
        return keygen

    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30,
                       get_best_cover=False):
        max_covers = g.plugin_prefs[g.STORE_NAME].get(g.MAX_COVERS, g.DEFAULT_STORE_VALUES[g.MAX_COVERS])
        if max_covers == 0:
            log.info('Searching for covers on xTrance is disabled. You can enable it in plugin preferences.')
            return
        urls = self.get_cached_cover_url(identifiers)

        if urls is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                urls = self.get_cached_cover_url(mi.identifiers)
                if urls is not None:
                    break

        if urls:
            urls = urls[:max_covers]
            for url in urls:
                url = ''.join([self.COVER_URL, url])
                try:
                    cdata = self.browser.open_novisit(url, timeout=timeout).read()
                    result_queue.put((self, cdata))
                except Exception as e:
                    log.exception('*** Failed to download cover - %s' % e)

        if urls is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return

    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
        log.info('\n--------', self.name, ': searching for books started with this information:')
        matches = []
        no_author_matches = []
        no_matches = []
        title, identifiers, meta_dict = self.search_title_for_metadata(title, identifiers)
        self.meta_dict = meta_dict
        log.info('Title:\t', title, '\nAuthors:\t', authors, '\nIds:\t', identifiers, '\nExtra ids:\t', meta_dict)
        log.info('--------')

        # match title from start
        query = self.create_query(title=title)
        if query is None:
            log.error('*** query is empty')
            return
        log.info('\nRunning query (match from beginning) below:\n%s' % query)
        try:
            root = load_url(log, query, self.browser)
        except Exception as e:
            return as_unicode(e)
        self._parse_search_results(log, title, authors, root, matches, no_author_matches, no_matches, timeout)

        # match title from whole string
        if g.plugin_prefs[g.STORE_NAME].get(g.SEARCH_WHOLE_TITLE, g.DEFAULT_STORE_VALUES[g.SEARCH_WHOLE_TITLE]):
            query = self.create_query(title=title, match_title_from_start=False)
            if query is None:
                log.error('*** query is empty')
                return
            log.info('\nRunning query (whole string match title) below:\n%s' % query)
            try:
                root = load_url(log, query, self.browser)
            except Exception as e:
                return as_unicode(e)
            self._parse_search_results(log, title, authors, root, matches, no_author_matches, no_matches, timeout)

        # BACKUP OPTIONS if plugins does not find any results
        if not any((matches, no_author_matches)):
            log.info('\nStill no matches. Trying backup options - Find only using one word from title (from longest).')
            # match only one word from title (longest first)
            title_word_list = reversed(sorted(title.split(), key=len)) if title else []
            for word in title_word_list:
                query = self.create_query(title=word, match_title_from_start=False)
                if query is None:
                    log.error('*** query is empty')
                    return
                log.info('\nRunning backup search (separate words from title - now word: %s) query:\n%s' % (word, query))
                try:
                    root = load_url(log, query, self.browser)
                except Exception as e:
                    return as_unicode(e)
                self._parse_search_results(log, title, authors, root, matches, no_author_matches, no_matches, timeout)
        if not any((matches, no_author_matches)):
            log.info('\nStill no matches. Trying backup option - Swap title and authors fields.')
            # swapped title/authors
            orig_auths = authors
            original_title = title
            title_word_list = sorted(title.split(), key=len) if title else []
            title = ' '.join(authors) if authors else None
            authors = title_word_list if title_word_list else None

            query = self.create_query(title=title, match_title_from_start=False)
            if query is None:
                log.error('*** query is empty')
                return
            log.info('\nRunning backup search (swapped title and authors field) query:\n%s' % (query))
            try:
                root = load_url(log, query, self.browser)
            except Exception as e:
                return as_unicode(e)
            self._parse_search_results(log, title, authors, root, matches, no_author_matches, no_matches, timeout)
            
            # Clean up after this backup option
            title = original_title
            authors = orig_auths

        matches_count = sum((len(matches), len(no_author_matches), len(no_matches)))
        if matches_count:
            log.info('\n-----\nFound:\n%s author/title matches\n%s only title matches' \
                    '\n%s no matches\n-- merging into %s total results'%(
                        len(matches), len(no_author_matches), len(no_matches),
                        matches_count))
        matches.extend(no_author_matches)
        matches.extend(no_matches)

        from calibre_plugins.xTrance.xtparser import XtParser
        mdata = []
        parser = XtParser(log, self, mdata, matches, meta_dict)
        parser.parse_data()

        if abort.is_set():
            return

        max_results = g.plugin_prefs[g.STORE_NAME].get(g.MAX_DOWNLOADS, g.DEFAULT_STORE_VALUES[g.MAX_DOWNLOADS])

        if len(mdata) > max_results:
            log.info('\nStripping results - from %s to %s.'%(len(mdata), max_results))

        mdata.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
        mdata = mdata[:max_results] if len(mdata) > max_results else mdata
        for item in mdata:
            result_queue.put(item)

    def create_query(self, title=None, match_title_from_start=True):
        title_tokens = list(self.get_title_tokens(title, strip_joiners=False, strip_subtitle=True))
        # Remove special chars from title
        title_tokens = [re.sub(r'[:.,#\-=+]', '', t) for t in title_tokens]
        tokens = [quote(t.encode('utf-8') if isinstance(t, str) else t) for t in title_tokens]
        if match_title_from_start:
            return self.SEARCH_TITLE_API_URL + '+'.join(tokens)
        else:
            return self.SEARCH_TITLE_API_URL + '*' + '+'.join(tokens)

    @staticmethod
    def match(log, item1, item2):
        item1 = lower(item1)
        item2 = lower(item2)
        return item1 == item2 or item1 in item2 or item2 in item1
    
    @staticmethod
    def get_xpath(root, xpath, convert=lambda x: x[0].strip() if x else None):
        return convert(root.xpath(xpath))

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, no_author_matches, no_matches, timeout):
        unique_ids = {self.get_xpath(m[0], 'xtrance_id/text()') for m in matches if self.get_xpath(m[0], 'xtrance_id/text()')}
        results = self.get_xpath(root, '//publication', convert=lambda x: x if x else [])
        log('\nNow try to match %s results...'%(len(results)))
        for result in results:
            title = self.get_xpath(result, 'title/text()')
            url_id = self.get_xpath(result, 'xtrance_id/text()')
            author = self.get_xpath(result, 'author/text()')

            # Create author tokens including 'ová' variants
            orig_authors_tokens = {res_o.strip(',').strip('ová').strip(' (Autor)').strip(' (Překlad)')
                                   .strip(' (Ilustrace)').strip(' (Obálka)').strip(' (Ortonym)') \
                                    for o in orig_authors for res_o in o.split(' ')} if orig_authors else set()
            orig_authors_tokens_ova = {'%sová' %a for a in orig_authors_tokens} if orig_authors_tokens else set()
            orig_authors_tokens.union(orig_authors_tokens_ova)

            if title and orig_title and any([self.match(log, title, t_word) for t_word in orig_title.split()]):
                title_token = [self.match(log, title, t_word) for t_word in orig_title.split()].count(True)
                if orig_authors_tokens and author and any((self.match(log, author, oauthor) for oauthor in orig_authors_tokens)):
                    if url_id not in unique_ids:
                        token_matches = [self.match(log, author, oauthor) for oauthor in orig_authors_tokens].count(True)
                        matches.append((result, -token_matches, -title_token))
                        log.info('+++ author and title match a(%d) t(%d) : %s; author : %s; url id : %s' % (token_matches, title_token, title, author, url_id))
                    else:
                        log.info('-   duplicate, skipping: %s; author : %s; url id : %s' % (title, author, url_id))
                else:
                    if url_id not in unique_ids:
                        no_author_matches.append((result, 0, -title_token))
                        log.info('++  only title match t(%d): %s; author : %s; url id : %s' % (title_token, title, author, url_id))
                    else:
                        log.info('-   duplicate, skipping: %s; author : %s; url id : %s' % (title, author, url_id))
            else:
                if url_id not in unique_ids:
                    log.info('+   adding no match: %s; author : %s; url id : %s' % (title, author, url_id))
                    no_matches.append((result, 1, 1))
                else:
                    log.info('-   duplicate, skipping: %s; author : %s; url id : %s' % (title, author, url_id))
