#!/usr/bin/env python
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3' #Based on B&N plugin by Grant Drake
__copyright__ = '2011, Rodrigo Coin Curvo <rodrigoccurvo@gmail.com>'
__docformat__ = 'en'

import time, re
from urllib import quote
from Queue import Queue, Empty

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc

class Skoob(Source):

    name                    = 'Skoob'
    description             = _('Downloads metadata and covers from Skoob')
    author                  = 'Rodrigo Coin Curvo'
    version                 = (1, 2, 2)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:skoob',
        'identifier:isbn', 'tags', 'rating', 'comments', 'publisher', 'pubdate'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    BASE_DOMAIN = 'skoob.com.br'
    BASE_URL = 'http://www.skoob.com.br'

    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.skoob_metadata.config import ConfigWidget
        return ConfigWidget(self)

    def _get_skoob_book_url(self, skoob_id):
        return '%s/livro/%s'%(Skoob.BASE_URL, skoob_id)

    def get_book_url(self, identifiers):
        skoob_id = identifiers.get('skoob', None)
        if skoob_id:
            return 'skoob', skoob_id, self._get_skoob_book_url(skoob_id)

    def create_query(self, log, title=None, authors=None, identifiers={}, page=None):

        q = ''

        if title:
            title_tokens = list(self.get_title_tokens(title, strip_subtitle=True))

            from calibre_plugins.skoob_metadata.similarity import clean_words
            title_tokens = clean_words(title_tokens)

            encoded_tokens = []
            for i in title_tokens:
                encoded_tokens.append(quote(i.encode('iso-8859-1', errors='replace')))

            # Use encoded space ("%20") to join
            if encoded_tokens:
                q = '/tag:' + '%20'.join(encoded_tokens) + '/tipo:livro'
        elif authors:
            authors_tokens = list(self.get_author_tokens(authors))

            encoded_tokens = []
            for i in authors_tokens:
                encoded_tokens.append(quote(i.encode('iso-8859-1', errors='replace')))

            # Use encoded space ("%20") to join
            if encoded_tokens: 
                q = '/tag:' + '%20'.join(encoded_tokens) + '/tipo:autor'

        if not q:
            return None

        if page is not None:
            q = q + '/page:%s'%page

        return Skoob.BASE_URL + '/livro/lista' + q

    def _is_book_url(self, url):
        return re.search('skoob.com.br/livro/([0-9]+)', url)

    def _exec_search(self, log, abort, title, authors, identifiers, s_matches, 
            timeout, first='title', flags = {}):

        if first == 'title':
            query = self.create_query(log, title=title,
                identifiers=identifiers)
        elif first == 'authors':
            query = self.create_query(log, authors=authors,
                identifiers=identifiers)
        else:
            query = None

        if query is None:
            msg = 'Insufficient metadata to construct query'
            log.error(msg)
            raise Exception(msg)

        # This import has to be here, otherwise it wont work
        import calibre_plugins.skoob_metadata.config as cfg
        max_pages = cfg.getOption(cfg.KEY_MAX_PAGES)

        br = self.browser
        page = 1

        while query is not None and page <= max_pages:

            log.info('Querying: %s'%query)

            try:
                try:
                    response = br.open_novisit(query, timeout=timeout)
                except Exception as e:
                    msg = 'Failed to query'
                    raise Exception(msg)

                # Did we get redirect directly to the book?
                response_url = response.geturl()
                if self._is_book_url(response_url):
                    log.info("Single match!") # Jackpot!
                    flags['single_match'] = True
                    s_matches.append((0, response_url))
                    break

                raw = response.read().strip()
                raw = raw.decode('iso-8859-1', errors='replace')

                if not raw:
                    msg = 'Failed to get raw result'
                    raise Exception(msg)

                try:
                    root = fromstring(clean_ascii_chars(raw))
                except Exception as e:
                    msg = 'Failed to parse page'
                    raise Exception(msg)

            except Exception as e:
                msg = 'Error on query %s: %s'%(query, e)
                log.error(msg)

                # Worry only if this is the first page
                if page == 1:
                    raise Exception(msg)
                else:
                    return

            if abort.is_set():
                return

            # Now grab the matches from the search results, provided the
            # title and authors appear to be for the same book
            self._parse_search_results(log, title, authors, root, s_matches, timeout)

            # Check if there is another page
            next = root.xpath('//div[@class="paginacao_lista_busca_down"]/div[@class="proximo"]/span/a')

            if next:
                page = page + 1
                query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers, page=page)
            else:
                query = None

    def _search(self, log, abort, title, authors, identifiers, s_matches, 
            timeout, flags = {}):
        try:
            self._exec_search(log, abort, 
                        title, authors, identifiers, 
                        s_matches, timeout,
                        first='title', flags=flags)
        except Exception as e:
            log.error(e)

        # TODO Option!
        if not s_matches:
            log.info("No matches with title, trying with author...");
            try:
                self._exec_search(log, abort, 
                            title, authors, identifiers, 
                            s_matches, timeout,
                            first='authors', flags=flags)
            except Exception as e:
                log.error(e)

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''

        # This import has to be here, otherwise it wont work
        import calibre_plugins.skoob_metadata.config as cfg

        s_matches = []
        flags = {}

        # If we have a Skoob id then we do not need to fire a "search"
        # at skoob.com.br. Instead we will go straight to the URL for that book.

        # Skoob has no ISBN search, so it's either id or title/authors
        skoob_id = identifiers.get('skoob', None)
        if skoob_id:
            s_matches.append((100, self._get_skoob_book_url(skoob_id)))
        else:
            self._search(log, abort, 
                        title, authors, identifiers, 
                        s_matches, timeout,
                        flags)

            try_exchanging = cfg.getOption(cfg.KEY_TRY_EXCHANGING)
            # If nothing found, try exchanging title and authors
            if not s_matches and try_exchanging:
                log.info("No matches! Trying exchanging title and authors...")
                try:
                    self._search(log, abort, 
                                ' '.join(authors), [title], identifiers, 
                                s_matches, timeout, 
                                flags)
                except Exception as e:
                    log.error(e)

        max_downloads = cfg.getOption(cfg.KEY_MAX_DOWNLOADS)

        s_matches.sort(reverse=True)
        s_matches = s_matches[:max_downloads]

        # This import has to be here, otherwise it wont work
        from calibre_plugins.skoob_metadata.worker import Worker

        br = self.browser
        r_queue = Queue()
        workers = [Worker(url, r_queue, br, log, 100-sim, self) for sim, url in
                s_matches]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        # TODO Improve this, it's ugly
        while True:
            try:
                r = r_queue.get_nowait()
                if 'single_match' in flags and flags['single_match']:
                    log.info("Rechecking similarity...")
                    composed_sim = self._check_similarity(log, title, authors, r.title, r.authors)
                    if composed_sim is not None:
                        r.source_relevance = 100-composed_sim
                    else:
                        continue
                result_queue.put(r)
            except Empty:
                break

        return None

    def _calc_similarity(self, log, orig_title, orig_authors, title, authors):
        orig_title_tokens = list(self.get_title_tokens(orig_title))
        orig_author_tokens = list(self.get_author_tokens(orig_authors))

        orig_author_tokens = map(lower, orig_author_tokens)
        orig_title_tokens = map(lower, orig_title_tokens)

        title_tokens = list(self.get_title_tokens(title))
        author_tokens = list(self.get_author_tokens(authors))

        title_tokens = map(lower, title_tokens)
        author_tokens = map(lower, author_tokens)

        from calibre_plugins.skoob_metadata.similarity import words_similarity

        title_sim = words_similarity(orig_title_tokens, title_tokens)
        author_sim = words_similarity(orig_author_tokens, author_tokens)

        return (title_sim, author_sim)

    def _check_similarity(self, log, orig_title, orig_authors, title, authors):
        # This import has to be here, otherwise it wont work
        import calibre_plugins.skoob_metadata.config as cfg
        similarity_threshold = cfg.getOption(cfg.KEY_THRESHOLD)

        title_sim, authors_sim = self._calc_similarity(log, orig_title, orig_authors, title, authors)
        composed_sim = (title_sim + authors_sim)/2

        if composed_sim < similarity_threshold:
            log.error('Rejecting as not close enough match (less than %s): \n'
                      '  %s Composed\n'
                      '  %s %s\n'
                      '  %s %s\n'%(similarity_threshold, composed_sim, title_sim, title, authors_sim, authors))
            return None
        else:
            log.info('Close enough (more than %s): \n'
                     '  %s Composed\n'
                     '  %s %s\n'
                     '  %s %s\n'%(similarity_threshold, composed_sim, title_sim, title, authors_sim, authors))
            return composed_sim

    def _parse_search_results(self, log, orig_title, orig_authors, root, s_matches, timeout):
        results = root.xpath('id("resultadoBusca")/div[@class="box_lista_busca"]')
        if not results:
            return

        for result in results:

            try:
                info_a = result.xpath('div[@class="dados_lista_busca"]/a')
                info_text = result.xpath('div[@class="dados_lista_busca"]//text()')

                if not info_a or \
                   not info_text or \
                   len(info_text) < 2 or \
                   info_a[0].text_content().strip() != info_text[0].strip():
                    log.info("Ignoring malformed result.\n")
                    continue

                title = info_text[0].strip()

                authors = info_text[1].strip().split(',')

                # Check if there's a subtitle so we can get author correctly
                subtitle_nodes = result.xpath('div[@class="dados_lista_busca"]/span')
                if subtitle_nodes:
                    subtitle_node = subtitle_nodes[0]
                    if isinstance(subtitle_node, basestring):
                        subtitle = subtitle_node.strip()
                    else:
                        subtitle = subtitle_node.text_content().strip()
                    if subtitle == info_text[1].strip():
                        if len(info_text) >= 3:
                            authors = info_text[2].strip().split(',')
                        else:
                            authors = []

                result_url = info_a[0].get('href')

                if not re.search(Skoob.BASE_DOMAIN, result_url, flags=re.IGNORECASE):
                    result_url = Skoob.BASE_URL + result_url

                composed_sim = self._check_similarity(log, orig_title, orig_authors, title, authors)
                if composed_sim is not None:
                    s_matches.append((composed_sim, result_url))
            except Exception as e:
                log.warn("Problem while analysing result: %s"%e)

    def get_cached_cover_url(self, identifiers):
        url = None
        skoob_id = identifiers.get('skoob', None)
        if skoob_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                skoob_id = self.cached_isbn_to_identifier(isbn)
        if skoob_id is not None:
            url = self.cached_identifier_to_cover_url(skoob_id)
        return url

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)

if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)
    test_identify_plugin(Skoob.name,
        [
            (
                {'title':u'Eragon', 'authors':['Paolini']},
                [title_test(u'Eragon',
                    exact=True), authors_test(['Christopher Paolini'])]
            ),

            (
                {'title':u'Espiritualidade Integral', 'authors':['Wilber']},
                [title_test(u'Espiritualidade Integral',
                    exact=True), authors_test(['Ken Wilber'])]
            ),

            (
                {'title':u'Bruxa de Portobello', 'authors':['Paulo Coelho']},
                [title_test(u'A Bruxa de Portobello',
                    exact=True), authors_test(['Paulo Coelho'])]
            ),

            (
                {'title':u'Fortaleza Digital', 'authors':['Brown']},
                [title_test(u'Fortaleza Digital',
                    exact=True), authors_test(['Dan Brown'])]
            ),
        ])

