#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

# from six.moves import map

__license__   = 'GPL v3'
__copyright__ = '2011, Rodrigo Coin Curvo. 2019-2022, Thiago Oliveira'
__docformat__ = 'restructuredtext en'

# Standard libraries
import time
import re
from six.moves.urllib.parse import quote
from six.moves.queue import Queue, Empty
from lxml.html import fromstring

# Calibre libraries
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars

# Load translation files (.mo) on the folder 'translations'
try:
    load_translations()
except NameError:
    print(_('Translation files could not be loaded.'))


# Get a random user agent from calibre
def random_ua():
    try:
        from calibre import random_user_agent
        try:
            hdr = {'User-Agent': random_user_agent(allow_ie=False)}
            return str(hdr)
        except TypeError:
            hdr = {'User-Agent': random_user_agent()}
            return str(hdr)
    except ImportError:
        hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT .1; Win64; x64)'}
        return str(hdr)


class Skoob(Source):

    name                    = 'Skoob Books'
    description             = _('Downloads metadata and covers from Skoob')
    author                  = 'Rodrigo Coin Curvo / Thiago Oliveira'
    version                 = (1, 5, 7)
    minimum_calibre_version = (2, 0, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:skoob', 'identifier:isbn', 'tags',
                                'rating', 'comments', 'publisher', 'pubdate', 'languages', 'series'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    BASE_DOMAIN = 'skoob.com.br'
    BASE_URL = 'https://www.skoob.com.br'

    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.skoob_metadata.config import ConfigWidget
        return ConfigWidget(self)

    def _get_skoob_book_url(self, skoob_id):
        return '%s/livro/%s' % (Skoob.BASE_URL, skoob_id)

    def get_book_url(self, identifiers):
        skoob_id = identifiers.get('skoob', None)
        if skoob_id:
            return 'skoob', skoob_id, self._get_skoob_book_url(skoob_id)

    def create_query(self, log, title=None, authors=None, identifiers={}, page=None):

        q = ''

        if title:
            title_tokens = list(self.get_title_tokens(title, strip_subtitle=True))

            from calibre_plugins.skoob_metadata.similarity import clean_words
            title_tokens = clean_words(title_tokens)

            encoded_tokens = []
            for i in title_tokens:
                encoded_tokens.append(quote(i.encode('iso-8859-1', errors='replace')))

            # Use plus sign ("+") to join
            if encoded_tokens:
                q = '/' + '+'.join(encoded_tokens) + '/tipo:titulo'
        elif authors:
            authors_tokens = list(self.get_author_tokens(authors))

            encoded_tokens = []
            for i in authors_tokens:
                encoded_tokens.append(quote(i.encode('iso-8859-1', errors='replace')))

            # Use plus sign ("+") to join
            if encoded_tokens: 
                q = '/' + '+'.join(encoded_tokens) + '/tipo:autor'

        if not q:
            return None

        if page is not None:
            q = q + '/mpage:%s'%page

        return Skoob.BASE_URL + '/livro/lista' + q

    # def _is_book_url(self, url):
    #     return re.search('skoob.com.br/livro/([0-9]+)', url)

    def _exec_search(self, log, abort, isbn_id, title, authors, identifiers, s_matches,
                     timeout, first='title', flags={}):

        if first == 'title' and isbn_id:
            query = self.create_query(log, title=isbn_id, identifiers=identifiers)
        elif first == 'title':
            query = self.create_query(log, title=title, identifiers=identifiers)
        elif first == 'authors':
            query = self.create_query(log, authors=authors, identifiers=identifiers)
        else:
            query = None

        if query is None:
            msg = _('Insufficient metadata to construct query')
            log.error(msg)
            raise Exception(msg)

        # This import has to be here, otherwise it won't work
        import calibre_plugins.skoob_metadata.config as cfg
        max_pages = cfg.get_option(cfg.KEY_MAX_PAGES)

        br = self.browser
        page = 1

        while query is not None and page <= max_pages:

            log.info('   ')
            log.info(_('Querying: %s') % query)
            log.info('   ')

            try:
                try:
                    response = br.open_novisit(query, timeout=timeout)
                except Exception as e:
                    msg = _('Failed to query')
                    raise Exception(msg)

                raw = response.read().strip()
                raw = raw.decode('iso-8859-1', errors='replace')

                if not raw:
                    msg = _('Failed to get raw result')
                    raise Exception(msg)

                try:
                    root = fromstring(clean_ascii_chars(raw))
                except Exception as e:
                    msg = _('Failed to parse page')
                    raise Exception(msg)

            except Exception as e:
                msg = _('Error on query %s: %s') % (query, e)
                log.error(msg)

                # Worry only if this is the first page
                if page == 1:
                    raise Exception(msg)
                else:
                    return

            if abort.is_set():
                return

            # Now grab the matches from the search results, provided the
            # title and authors appear to be for the same book
            self._parse_search_results(log, title, authors, root, s_matches, timeout)

            # Check if there is another page
            next = root.xpath('//div[@class="paginacao_lista_busca_down"]/div[@class="proximo"]/a')

            if next:
                page = page + 1
                query = self.create_query(log, title=title, authors=authors, identifiers=identifiers, page=page)
            else:
                query = None

    def _search(self, log, abort, title, authors, identifiers, s_matches, timeout, flags={}):
        isbn_id = identifiers.get('isbn', None)
        try:
            self._exec_search(log, abort, isbn_id, title, authors, identifiers,
                              s_matches, timeout, first='title', flags=flags)
        except Exception as e:
            log.error(e)

        if not s_matches:
            log.info(_("No matches with ISBN, trying with title..."))
            isbn_id = None
            try:
                self._exec_search(log, abort, isbn_id, title, authors, identifiers,
                                  s_matches, timeout, first='title', flags=flags)
            except Exception as e:
                log.error(e)

        if not s_matches:
            log.info(_("No matches with title, trying with author..."))
            try:
                self._exec_search(log, abort, isbn_id, title, authors, identifiers,
                                  s_matches, timeout, first='authors', flags=flags)
            except Exception as e:
                log.error(e)

    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''

        s_matches = []
        flags = {}

        skoob_id = identifiers.get('skoob', None)

        if skoob_id:
            s_matches.append((100, self._get_skoob_book_url(skoob_id), authors, 0))

        else:
            self._search(log, abort, title, authors, identifiers, s_matches, timeout, flags)

            # This import has to be here, otherwise it won't work
            import calibre_plugins.skoob_metadata.config as cfg

            try_exchanging = cfg.get_option(cfg.KEY_TRY_EXCHANGING)
            # If nothing found, try exchanging title and authors
            if not s_matches and try_exchanging:
                log.info(_("No matches! Trying exchanging title and authors..."))
                try:
                    self._search(log, abort, ' '.join(authors), [title], identifiers, s_matches, timeout, flags)
                except Exception as e:
                    log.error(e)

            max_downloads = cfg.get_option(cfg.KEY_MAX_DOWNLOADS)

            # if skoob_id:
            #     for match in s_matches:
            #         skoob_id_test = re.search(skoob_id, match[1])
            #         if skoob_id_test:
            #             s_matches = [match]

            from operator import itemgetter
            # Sort the list by similarity
            s_matches.sort(reverse=True)
            # Then, filter it to use only the higher similarity value available
            s_matches_filtered = [x for i, x in enumerate(s_matches) if x[0] == s_matches[0][0]]
            # Now, sort by number of Skoob readers
            s_matches_filtered.sort(key=itemgetter(3), reverse=True)
            s_matches = s_matches_filtered[:max_downloads]

        # This import has to be here, otherwise it wont work
        from calibre_plugins.skoob_metadata.worker import Worker

        br = self.browser
        r_queue = Queue()

        workers = [Worker(url, auts, r_queue, br, log, 100-sim, self) for sim, url, auts, readers in s_matches]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        # TODO Improve this, it's ugly
        while True:
            try:
                r = r_queue.get_nowait()
                result_queue.put(r)
            except Empty:
                break

        return None

    def _calc_similarity(self, log, orig_title, orig_authors, title, authors):
        orig_title_tokens = list(self.get_title_tokens(orig_title))
        orig_author_tokens = list(self.get_author_tokens(orig_authors))

        orig_author_tokens = list(map(lower, orig_author_tokens))
        orig_title_tokens = list(map(lower, orig_title_tokens))

        title_tokens = list(self.get_title_tokens(title))
        author_tokens = list(self.get_author_tokens(authors))

        title_tokens = list(map(lower, title_tokens))
        author_tokens = list(map(lower, author_tokens))

        from calibre_plugins.skoob_metadata.similarity import words_similarity

        title_sim = words_similarity(orig_title_tokens, title_tokens)
        author_sim = words_similarity(orig_author_tokens, author_tokens)

        return title_sim, author_sim

    def _check_similarity(self, log, orig_title, orig_authors, title, authors, readers):
        # This import has to be here, otherwise it wont work
        import calibre_plugins.skoob_metadata.config as cfg
        similarity_threshold = cfg.get_option(cfg.KEY_THRESHOLD)

        title_sim, authors_sim = self._calc_similarity(log, orig_title, orig_authors, title, authors)
        composed_sim = (title_sim + authors_sim) / 2

        if composed_sim < similarity_threshold:
            log.error(_('Rejecting as not close enough match (less than %s): \n'
                        '  %s Composed\n'
                        '  %s %s\n'
                        '  %s %s\n'
                        '  Skoob readers: %s\n') % (similarity_threshold, composed_sim, title_sim, title, authors_sim,
                                                    authors, readers))
            return None
        else:
            log.info(_('Close enough (more than %s): \n'
                       '  %s Composed\n'
                       '  %s %s\n'
                       '  %s %s\n'
                       '  Skoob readers: %s\n') % (similarity_threshold, composed_sim, title_sim, title, authors_sim,
                                                   authors, readers))
            return composed_sim

    def _parse_search_results(self, log, orig_title, orig_authors, root, s_matches, timeout):
        results = root.xpath('id("resultadoBusca")/div[@class="box_lista_busca_vertical"]')
        if not results:
            return

        for result in results:

            try:
                info_a = result.xpath('div[@class="box_lista_busca_vertical_detalhe"]/div[@class="detalhes"]/a')
                info_text = result.xpath('div[@class="box_lista_busca_vertical_detalhe"]'
                                         '/div[@class="detalhes"]/*[not(name()="div")]//text()')
                info_readers = result.xpath('div[@class="box_lista_busca_vertical_detalhe"]/'
                                            'div[@class="detalhes-2"]/div/div[1]/span/text()')
                readers = 0
                for span in info_readers:
                    test = re.search('(\d*\.*\d*) leitores', span)
                    if test:
                        readers = int(test.groups(0)[0].replace('.', ''))

                if not info_a or \
                   not info_text or \
                   len(info_text) < 2 or \
                   info_a[0].text_content().strip() != info_text[0].strip():
                    log.info(_("Ignoring malformed result.\n"))
                    continue

                # Check if there is series information present on the title text
                pre_title = info_text[0].strip()
                title_test = re.search('\(.+#\d+\)', pre_title)
                if title_test:
                    title = re.search('(.+(?= \())', pre_title).groups(0)[0]
                else:
                    title = pre_title

                # Check for multiple authors and subtitle
                authors = []
                subtitle_nodes = result.xpath(
                    'div[@class="box_lista_busca_vertical_detalhe"]/div[@class="detalhes"]/span')
                if not info_a[1].text_content().strip():
                    if subtitle_nodes:
                        authors = info_text[2].strip().split(',')
                    else:
                        authors = info_text[1].strip().split(',')
                else:
                    if subtitle_nodes:
                        for author in info_text[2:]:
                            aut = author.strip()
                            authors.append(aut)
                    else:
                        for author in info_text[1:]:
                            aut = author.strip()
                            authors.append(aut)

                result_url = info_a[0].get('href')

                if not re.search(Skoob.BASE_DOMAIN, result_url, flags=re.IGNORECASE):
                    result_url = Skoob.BASE_URL + result_url

                composed_sim = self._check_similarity(log, orig_title, orig_authors, title, authors, readers)
                if composed_sim is not None:
                    s_matches.append((composed_sim, result_url, authors, readers))
            except Exception as e:
                log.warn(_("Problem while analysing result: %s") % e)

    def get_cached_cover_url(self, identifiers):
        url = None
        skoob_id = identifiers.get('skoob', None)
        if skoob_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                skoob_id = self.cached_isbn_to_identifier(isbn)
        if skoob_id is not None:
            url = self.cached_identifier_to_cover_url(skoob_id)
        return url

    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info(_('No cached cover found, running identify'))
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info(_('No cover found'))
            return

        if abort.is_set():
            return
        br = self.browser
        log(_('Downloading cover from:'), cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception(_('Failed to download cover from:'), cached_url)


# Tests
if __name__ == '__main__':
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import test_identify_plugin, title_test, authors_test, series_test
    test_identify_plugin(Skoob.name,
        [
            (
                {'title':u'Eragon', 'authors':['Paolini']},
                [title_test(u'Eragon',
                    exact=True), authors_test(['Christopher Paolini'])]
            ),

            (
                {'title':u'A Viajante do Tempo', 'authors':['Gabaldon']},
                [title_test(u'A Viajante do Tempo',
                    exact=True), authors_test(['Diana Gabaldon']), series_test('Outlander', '1')]
            ),

            (
                {'title':u'Sociedade Anel', 'authors':['J Tolkien']},
                [title_test(u'A Sociedade do Anel',
                    exact=True), authors_test(['J. R. R. Tolkien']), series_test('O Senhor dos Anéis', '1')]
            ),

            (
                {'title':u'A Ditadura Envergonhada', 'authors':['Elio']},
                [title_test(u'A Ditadura Envergonhada',
                    exact=True), authors_test(['Elio Gaspari']),]
            ),
        ])

