#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__ = 'GPL v3'
__copyright__ = '2014, Jose Antonio Espinosa - BiblioEteca Technologies'
__docformat__ = 'restructuredtext en'

import time
import sys
from urllib import quote
from Queue import Queue, Empty

from lxml import etree
from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc
#from calibre import ipython



class BIBLIOETECA(Source):

    name = 'BIBLIOETECA'
    description = _('Descarga metadatos de BiblioEteca.com')
    author = 'Jose Antonio Espinosa'
    version = (0, 3, 0)
    minimum_calibre_version = (0, 8, 0)

    BASE_URL0 = 'http://api.biblioeteca.com'
    BASE_URL_ADV = 'http://api.biblioeteca.com/biblioeteca.web/xml/buscaravanzado?titulo={0}&autor={1}&isbn={2}&formato=*&biblioetecaid={3}'
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:biblioeteca', 'rating', 'comments'])
    has_html_comments = True
    cached_cover_url_is_reliable = True
    supports_gzip_transfer_encoding = True
    

    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.BIBLIOETECA.config import ConfigWidget
        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        biblioeteca_id = identifiers.get('biblioeteca', None)
        if biblioeteca_id:
            return BIBLIOETECA.BASE_URL_ADV.format('', '', '', biblioeteca_id)


    def create_query(self, log, title=None, authors=None, identifiers={}):

        biblioeteca_id = identifiers.get('biblioeteca', None)
        if biblioeteca_id:
            return BIBLIOETECA.BASE_URL_ADV.format('', '', '', biblioeteca_id)
        
        isbn = check_isbn(identifiers.get('isbn', None))		
        q = ''
        if isbn is not None:
            return BIBLIOETECA.BASE_URL_ADV.format('', '', isbn, '')
        

        title_str = ''
        if title:		   
            title = get_udc().decode(title)
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
                q = '%20'.join(tokens)
                title_str = '%20'.join(tokens)

        autor_str = ''
        if authors:
            authors = [get_udc().decode(a) for a in authors]
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
                q += '%20' + '%20'.join(tokens)
                autor_str = '%20'.join(tokens)

        if not q:
            return None
        return BIBLIOETECA.BASE_URL_ADV.format(title_str, autor_str, '', '')

    def get_cached_cover_url(self, identifiers):
        url = None
        biblioeteca_id = identifiers.get('biblioeteca', None)
        if biblioeteca_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                biblioeteca_id = self.cached_isbn_to_identifier(isbn)
        if biblioeteca_id is not None:
            url = self.cached_identifier_to_cover_url(biblioeteca_id)
        return url

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
        ratings = []
        biblioeteca_id = identifiers.get('biblioeteca', None)
        isbn = check_isbn(identifiers.get('isbn', None))		
        br = self.browser

        #print ('Id: isbn - biblioeteca',isbn,biblioeteca_id)
        #if biblioeteca_id:
        #    url_fixed = '%s%s'%(BIBLIOETECA.BASE_URL, biblioeteca_id)
            #print ("URL:",url_fixed)
        #    matches.append(url_fixed)
        #    ratings.append(0)
        #else:	
            #print ('Buscando query')

        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)	
        if query is None:
            log.error('Insufficient metadata to construct query')
            return
        isbn_match_failed = False
        log.info('Querying: %s' % query)
        response = br.open_novisit(query, timeout=timeout)
        try:				    
            raw = response.read().strip()
            #raw = raw.decode('utf-8', errors='replace')
            if not raw:
                log.error('Failed to get raw result for query: %r' % query)
                return
            parser = etree.XMLParser(ns_clean=True, recover=True)
            root = etree.fromstring(raw, parser)
        except:
            msg = 'Failed to parse BiblioEteca.com page for query: %r' % query
            log.exception(msg)
            return msg
        self._parse_search_results(log, title, authors, root, matches, ratings, timeout)

        if abort.is_set():
            return
        if not matches:		   
            if identifiers and title and authors:
                log.info('No matches found with identifiers, retrying using only'
                        ' title and authors')
                return self.identify(log, result_queue, abort, title=title,
                        authors=authors, timeout=timeout)
            log.error('No se encontró nada con: %r' % query)
            return



        from calibre_plugins.BIBLIOETECA.worker import Worker
        combos = zip(matches, ratings)
        workers = [Worker(combo, result_queue, br, log, i, self) for  i, combo in
                enumerate(combos)]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, ratings, timeout):
        #log.error ('Parseando...')
        #log.error (etree.tostring(root))
        results = root.xpath('/books/book')


        if not results:
            return		

        def sinacentos(s):
            res = s.lower()
            res = res.replace('á', 'a')
            res = res.replace('é', 'e')
            res = res.replace('í', 'i')
            res = res.replace('ó', 'o')
            res = res.replace('ú', 'u')
            return res

        def ismatch(title):
            #only if the title exact matches
            match = False
            if sinacentos(title) == sinacentos(orig_title):
                match = True
            #Ver más posibilidades de acierto
            return match 
           
        import calibre_plugins.BIBLIOETECA.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]

        for result in results:
            title_xp = result.xpath('title')
            if len(title_xp) > 0 :
                title = title_xp[0].text
                log.error (title)
                id_xp = result.xpath('id')
                url = id_xp[0].text
                log.error (id)
            else:
                log.error ('No hay titulos')
                
            while '  ' in title:
                title = title.replace('  ', ' ')
            # Strip off any series information from the title
            if '(' in title:
                title = title.rpartition('(')[0].strip()
                title_tokens = list(self.get_title_tokens(orig_title))
            valid_cat = True
            rating = 0
            
            if not ismatch(title):
                log.error('Rejecting as not close enough match: %s ' % (title))
            else:
                valid_cat = True
            if valid_cat:
                result_url = url
                matches.append(result)
                ratings.append(rating)
                if len(matches) >= max_results:
                    break
        

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        import calibre_plugins.BIBLIOETECA.config as cfg
        default_coversize = cfg.DEFAULT_STORE_VALUES[cfg.KEY_COVERSIZE]
        self.coversize = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_COVERSIZE, default_coversize)

        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if sys.getsizeof(cdata) <> 9361 and sys.getsizeof(cdata) <> self.coversize:
                result_queue.put((self, cdata))

        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)
    test_identify_plugin(BIBLIOETECA.name,
        [

            (# A book with an ISBN
                {'identifiers':{'isbn': '9788401337635'},
                    'title':'La caida de los gigantes', 'authors':['Ken Follett']},
                [title_test('La caida de los gigantes',
                    exact=True), authors_test(['Ken Follett']),
                    series_test('The Century', 1.0)]

            ),


            (# A book with no ISBN specified
                {'title':"Armantia", 'authors':['Moisés Cabello']},
                [title_test("Armantia",
                    exact=True), authors_test(['Moisés Cabello']),
                    series_test('Serie Multiverso', 1.0)]

            ),


            (# A book with a biblioeteca id
                {'identifiers':{'biblioeteca': '161224'},
                    'title':'La caida de los gigantes', 'authors':['Ken Follett']},
                [title_test('La caida de los gigantes',
                    exact=True), authors_test(['Ken Follett']),
                    series_test('The Century', 1.0)]

            ),

        ])


