#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Pr.BarnArt based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import time
from urllib import quote
from Queue import Queue, Empty

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc
#from calibre import ipython

class ADLIBRIS_SE(Source):

    name                    = 'ADLIBRIS_SE'
    description             = _('Downloads metadata en covers van Adlibris')
    author                  = 'Pr. BarnArt'
    version                 = (0, 2, 1)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate','series'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True
	cached_cover_url_is_reliable = True

	BASE_URL0 = 'http://www.adlibris.com/se/'
    BASE_URL = 'http://www.adlibris.com/se/searchresult.aspx?search=quickfirstpage&quickvalue='
	BASE_ISBN= 'http://www.adlibris.com/se/product.aspx?isbn='
	BASE_URL_LAST = '+&fromproduct=False&onlylocallanguage=1' 
	
    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.ADLIBRIS_SE.config import ConfigWidget
        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        adlibris_se_id = identifiers.get('adlibris_se', None)
		if adlibris_se_id:
		    return ('adlibris_se', adlibris_se_id,
                  '%s%s%s'%(ADLIBRIS_SE.BASE_URL, adlibris_se_id,ADLIBRIS_SE.BASE_URL_LAST))
				  

    def create_query(self, log, title=None, authors=None, identifiers={}):

        isbn = check_isbn(identifiers.get('isbn', None))		
        q = ''
        if isbn is not None:	
             return '%s%s'%(ADLIBRIS_SE.BASE_ISBN,isbn)
        if title:		   
            title = get_udc().decode(title)
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]               
				q='+'.join(tokens)
		if authors:
            authors = [get_udc().decode(a) for a in authors]
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
				q+='+'+'+'.join(tokens)
        if not q:
            return None
        return '%s%s%s%s%s'%(ADLIBRIS_SE.BASE_URL, q,'+&title=',q,ADLIBRIS_SE.BASE_URL_LAST) 

    def get_cached_cover_url(self, identifiers):
        url = None
        adlibris_se_id = identifiers.get('adlibris_se', None)
        if adlibris_se_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
               adlibris_se_id = self.cached_isbn_to_identifier(isbn)
        if adlibris_se_id is not None:
            url = self.cached_identifier_to_cover_url(adlibris_se_id)
        return url

    def cached_identifier_to_cover_url(self, id_):
        with self.cache_lock:
            url = self._get_cached_identifier_to_cover_url(id_)
            if not url:
                # Try for a "small" image in the cache
                url = self._get_cached_identifier_to_cover_url('small/'+id_)
            return url

    def _get_cached_identifier_to_cover_url(self, id_):
        # This must only be called once we have the cache lock
        url = self._identifier_to_cover_url_cache.get(id_, None)
        if not url:
            # We could not get a url for this particular B&N id
            # However we might have one for a different isbn for this book
            # Barnes & Noble are not very consistent with their covers and
            # it could be that the particular ISBN we chose does not have
            # a large image but another ISBN we retrieved does.
            key_prefix = id_.rpartition('/')[0]
            for key in self._identifier_to_cover_url_cache.keys():
                if key.startswith('key_prefix'):
                    return self._identifier_to_cover_url_cache[key]
        return url

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
		#need to read the ratings of an indexpage, i cannot trace them on the productpage
		#use ratings for the selected matches, so the order of rating is equal to matches 
		ratings = []
        adlibris_se_id = identifiers.get('adlibris_se', None)
	    isbn = check_isbn(identifiers.get('isbn', None))		
        br = self.browser

        if adlibris_se_id:		    
            matches.append('%s%s%s%s'%(ADLIBRIS_SE.BASE_URL, adlibris_se_id,ADLIBRIS_SE.BASE_URL_1,ADLIBRIS_SE.BASE_URL_LAST))
        else:		   
            query = self.create_query(log, title=title, authors=authors,
                    identifiers=identifiers)	
            print('query;',query)					
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            isbn_match_failed = False
            log.info('Querying: %s'%query)
            response = br.open_novisit(query, timeout=timeout)
            if isbn:
			    try:				    
					raw = response.read().strip()								
					raw = raw.decode('windows-1252', errors='replace')					
					if not raw:
						log.error('Failed to get raw result for query: %r'%query)
						return
					root = fromstring(clean_ascii_chars(raw))
					url_node=root.xpath('//div [@class="product"]/div[@class="productInfo"]')
					if url_node:
					    rating='0'
						ratings.append(rating)
						matches.append(query)
					else:
					    isbn_match_failed = True
				except:
					msg = 'Failed to parse ADLIBRIS.com(SE) page for query: %r'%query
					log.exception(msg)
					return msg					
			
            # For successful ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
			if not isbn or isbn_match_failed:
                try:				    
                    raw = response.read().strip()
                    raw = raw.decode('windows-1252', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query: %r'%query)
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse ADLIBRIS.com(SE) page for query: %r'%query
                    log.exception(msg)
                    return msg
                # Now grab the matches from the search results, provided the
                # title and authors appear to be for the same book
                self._parse_search_results(log, title, authors, root, matches,ratings,timeout)

        if abort.is_set():
            return
        if not matches:		   
            if identifiers and title and authors:
                log.info('No matches found with identifiers, retrying using only'
                        ' title and authors')
                return self.identify(log, result_queue, abort, title=title,
                        authors=authors, timeout=timeout)
            log.error('No matches found with query: %r'%query)
            return
				
        from calibre_plugins.ADLIBRIS_SE.worker import Worker
		#need to mix url and ratings for the worker class
		combos= zip(matches,ratings)
        workers = [Worker(combo, result_queue, br, log, i, self) for  i,combo in
                enumerate(combos)]
		

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, ratings, timeout):
        results = root.xpath('//div [@id="searchResult"]/ul [@class="ulSearch"]/li/table/tr/td [@class="tdRightTop"]')
		for result in results:
           result_url=result.xpath('h2/a/@href')
        if not results:
            return		

        def ismatch(title):
		     #only if the title exact matches
			match = False
			if lower(title)==lower(orig_title):
				match= True
			return match 
           
        import calibre_plugins.ADLIBRIS_SE.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
        for result in results:
            result_url=result.xpath('h2/a/@href')
			result_id=result.xpath('h2/a/@id')
			id_txt=result_id[0]
			findend=id_txt.find('_hlkTitle')
			id_txt=id_txt[0:findend]
            text_node=result.xpath('h2/a')	
			title=''
            if text_node:
				txt=text_node[0].text_content().strip().lower()
				findend=txt.find(' av ')
				title=txt[0:findend]
            while '  ' in title:
                title = title.replace('  ',' ')
            # Strip off any series information from the title
            if '(' in title:
                title = title.rpartition('(')[0].strip()
                title_tokens = list(self.get_title_tokens(orig_title))
			print('title:',title)
            if not ismatch(title):
                log.error('Rejecting as not close enough match: %s '%(title))
            else:
			    # Validate that the cat  is  not one we are not interested in	
				xpath_txt='%s%s%s'%('div [@class="extra"]/span [@id="',id_txt,'_Label4"]')
				cat_details = result.xpath(xpath_txt)
				valid_cat = False
				cat_txt=''
				if cat_details:
					cat_txt=cat_details[0].text_content().strip().lower()
					if cat_txt=='inbunden':
						valid_cat=True
					'''
					if cat_txt=='pocket':
						valid_cat=True
					if cat_txt=='e-bok':
						valid_cat=True
					'''
				rating='0'
			    if not valid_cat:
					log.error('Rejecting as not good category: %s, %s '%(cat_txt,title))
				if valid_cat:
				    log.error('Accept as  good category: %s, %s '%(cat_txt,title))
				   	matches.append( '%s%s'%('http://www.adlibris.com/se/',result_url[0]))	
					ratings.append(rating)
					if len(matches) >= max_results:
						break

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)
    test_identify_plugin(ADLIBRIS_SE.name,
        [

            ( # A book with no ISBN specified
                {'title':"Harry Potter and the Sorcerer's Stone", 'authors':['J.K. Rowling']},
                [title_test("Harry Potter and the Sorcerer's Stone",
                    exact=True), authors_test(['J. K. Rowling']),
                    series_test('Harry Potter', 1.0)]

            ),

            ( # A book with an ISBN
                {'identifiers':{'isbn': '9780439064866'},
                    'title':'Chamber of Secrets', 'authors':['J.K. Rowling']},
                [title_test('Harry Potter and the Chamber of Secrets',
                    exact=True), authors_test(['J. K. Rowling']),
                    series_test('Harry Potter', 2.0)]

            ),

            ( # A book with a Barnes & Noble id
                {'identifiers':{'BOL_NL': '61-Hours/Lee-Child/e/9780440243694'},
                    'title':'61 Hours', 'authors':['Lee Child']},
                [title_test('61 Hours',
                    exact=True), authors_test(['Lee Child']),
                    series_test('Jack Reacher', 14.0)]

            ),

        ])


