#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Pr.BarnArt based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import time, re
from urllib import quote
from Queue import Queue, Empty

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc
#from calibre import ipython

class ADLIBRIS_NO(Source):

    name                    = 'ADLIBRIS_NO'
    description             = _('Downloads metadata and covers from Adlibris.no')
    author                  = 'Pr. BarnArt'
    version                 = (0, 0 ,10)
    minimum_calibre_version = (0, 2, 5)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'comments', 'publisher', 'pubdate','languages','series'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True
	cached_cover_url_is_reliable = True

	BASE_URL0 = 'http://www.adlibris.com'
	
    BASE_URL = 'http://www.adlibris.com/no/searchresult.aspx?author='
	BASE_URL_TITLE='&title='
	BASE_ISBN= 'http://www.adlibris.com/no/sok?q='
	BASE_URL_LAST = '&language=Norska&fromproduct=False' 
    
    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.ADLIBRIS_NO.config import ConfigWidget
        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        adlibris_no_id = identifiers.get('adlibris_no', None)
		if adlibris_no_id:
		    return ('adlibris_no', adlibris_no_id,
                  '%s%s%s'%(ADLIBRIS_NO.BASE_URL, adlibris_no_id,ADLIBRIS_NO.BASE_URL_LAST))
	

	def get_no_author_tokens(self, authors, only_first_author=True):
        '''
        Take a list of authors and return a list of tokens useful for an
        AND search query. This function tries to return tokens in
        first name middle names last name order, by assuming that if a comma is
        in the author name, the name is in lastname, other names form.
        '''

        if authors:
            # Leave ' in there for Irish names
            remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
            replace_pat = re.compile(r'[-+.:;,]')
            if only_first_author:
                authors = authors[:1]
            for au in authors:
                has_comma = ',' in au
                au = replace_pat.sub(' ', au)
                parts = au.split()		
                if has_comma:
                    # au probably in ln, fn form
                    parts = parts[1:] + parts[:1]
                for tok in parts:
                    #tok = remove_pat.sub('', tok).strip()
                    if (len(tok)==2 and  tok.find('.')==-1 or len(tok) > 2) and tok.lower() not in ('von', 'van',
                            _('Unknown').lower()):
                        yield tok

	

    def create_query(self, log, title=None, authors=None, identifiers={}):

        isbn = check_isbn(identifiers.get('isbn', None))		
        q = ''
		au = ''
        if isbn is not None:
            return '%s%s'%('http://www.adlibris.com/no/sok?q=',isbn)
        if title:		   
            #title = get_udc().decode(title)
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                tokens = [quote(t.encode('utf_8') if isinstance(t, unicode) else t) for t in title_tokens]               
				q='+'.join(tokens)
		if authors:
			author_tokens = self.get_no_author_tokens(authors,
                    only_first_author=True)	
            if author_tokens:			    
                tokens = [quote(t.encode('utf_8') if isinstance(t, unicode) else t) for t in author_tokens]
				au='+'.join(tokens)			
        if not q:
            return None
		
        return '%s%s%s%s%s'%( 'http://www.adlibris.com/no/sok?q=',au,'+',q,'+Norsk')
		
    def get_cached_cover_url(self, identifiers):
        url = None
        adlibris_no_id = identifiers.get('adlibris_no', None)
        if adlibris_no_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
               adlibris_no_id = self.cached_isbn_to_identifier(isbn)
        if adlibris_no_id is not None:
            url = self.cached_identifier_to_cover_url(adlibris_no_id)
        return url

    def cached_identifier_to_cover_url(self, id_):
        with self.cache_lock:
            url = self._get_cached_identifier_to_cover_url(id_)
            if not url:
                # Try for a "small" image in the cache
                url = self._get_cached_identifier_to_cover_url('small/'+id_)
            return url

    def _get_cached_identifier_to_cover_url(self, id_):
        # This must only be called once we have the cache lock
        url = self._identifier_to_cover_url_cache.get(id_, None)
        if not url:
            # We could not get a url for this particular B&N id
            # However we might have one for a different isbn for this book
            # Barnes & Noble are not very consistent with their covers and
            # it could be that the particular ISBN we chose does not have
            # a large image but another ISBN we retrieved does.
            key_prefix = id_.rpartition('/')[0]
            for key in self._identifier_to_cover_url_cache.keys():
                if key.startswith('key_prefix'):
                    return self._identifier_to_cover_url_cache[key]
        return url

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
		#need to read the ratings of an indexpage, i cannot trace them on the productpage
		#use ratings for the selected matches, so the order of rating is equal to matches 
		ratings = []
        adlibris_no_id = identifiers.get('adlibris_no', None)
	    isbn = check_isbn(identifiers.get('isbn', None))		
        br = self.browser
        if adlibris_no_id:		    
            matches.append('%s%s%s%s'%(ADLIBRIS_NO.BASE_URL, adlibris_no_id,ADLIBRIS_NO.BASE_URL_1,ADLIBRIS_NO.BASE_URL_LAST))
        else:		   
            query = self.create_query(log, title=title, authors=authors,
                    identifiers=identifiers)	
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            isbn_match_failed = False
            log.info('Querying: %s'%query)
            response = br.open_novisit(query, timeout=timeout)
            if isbn:
			    try:
					raw = response.read().strip()								
					raw = raw.decode('utf_8', errors='replace')					
					if not raw:
						log.error('Failed to get raw result for query: %r'%query)
						return
					root = fromstring(clean_ascii_chars(raw))
					error_node= root.xpath('//div [@class="ctrlProductErrorTable"]')
					error=''
					if error_node:
						error=error_node[0].text_content().strip()
					
					if  len(error)==0:
						rating='0'
						#ratings.append(rating)
						#matches.append(query)
						url_node=root.xpath('//div [@class="search-result__list-view__product__information"]/h4/')#a[@class="search-result__product__name"]/@href')
						results = root.xpath('//div [@class="section search results"]/ul/li/div/div [@class="product-item"]/div/div [@class="item-info"]')
						for result in results:
							result_url=result.xpath('a/@href')
							result_url_txt=result_url[0]
							ratings.append(rating)
							matches.append('%s%s'%('http://www.adlibris.com',result_url[0]))
					else:
						# isbn produces an error, possible unknow, try to find a hit with aythor an d title bij blanking isbn
						#isbn=none
						identifiers['isbn']= None
						isbn_match_failed=True
						query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
						if query is None:
							log.error('Insufficient metadata after blanking isbn to construct query')
							return
						#get new  response
						response = br.open_novisit(query, timeout=timeout)
						
				except:
					msg = 'Failed to parse ADLIBRIS.com(NO) page for query: %r'%query
					log.exception(msg)
					return msg					
			
            # For successful ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
			if not isbn or isbn_match_failed:
                try:	
                    raw = response.read().strip()
                    raw = raw.decode('utf_8', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query: %r'%query)
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse ADLIBRIS.com(NO) page for query: %r'%query
                    log.exception(msg)
                    return msg
                # Now grab the matches from the search results, provided the
                # title and authors appear to be for the same book
                self._parse_search_results(log, title, authors, root, matches,ratings,timeout)

        if abort.is_set():
            return
        if not matches:		   
            if identifiers and title and authors:
                log.info('No matches found with identifiers, retrying using only'
                        ' title and authors')
                return self.identify(log, result_queue, abort, title=title,
                        authors=authors, timeout=timeout)
            log.error('No matches found with query: %r'%query)
            return
				
        from calibre_plugins.ADLIBRIS_NO.worker import Worker
		#need to mix url and ratings for the worker class
		combos= zip(matches,ratings)
        workers = [Worker(combo, result_queue, br, log, i, self) for  i,combo in
                enumerate(combos)]
		

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, ratings , timeout):
		#results = root.xpath('//div [@class="section search results"]/ul/li/div/div [@class="product-item"]/div/div [@class="item-info"]')
		results = root.xpath('//div [@class="search-result__list-view__product__wrapper"]/div/div [@class="search-result__list-view__product__image-and-information-container "]')
		for result in results:
			result_url=result.xpath('a/@href')
        if not results:
            return		

        def ismatch(title):
		    #only if the title exact matches
			match = False
			if lower(title)==lower(orig_title):				
				match= True
			else:
				title=title.replace('-',' ')
				title=title.replace('\'','')
				title=title.replace(';','')
				if lower(title)==lower(orig_title):				
					match= True	
			return match 
        
		
        import calibre_plugins.ADLIBRIS_NO.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
		#check  use subtitles
		sub_yes=False
		subtitle=''
		keys=cfg.plugin_prefs[cfg.STORE_NAME]
		if 'sub_yes' in keys:
			sub_yes = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_SUB_YES]
		
		
        for result in results:
			
            result_url=result.xpath('div/h4/a[@class="search-result__product__name"]/@href')
			result_id=result.xpath('div/h4/a[@class="search-result__product__name"]')
			title=result_id[0].text_content().strip()
            while '  ' in title:
                title = title.replace('  ',' ')
            # Strip off any series information from the title
            if '(' in title:
                title = title.rpartition('(')[0].strip()
                title_tokens = list(self.get_title_tokens(orig_title))
			title=title.lstrip()
			if '\n' in title:				
				pos=title.find('\n')
				title=title[0:pos-1]
			
			if sub_yes:
				pos=orig_title.find(':')
				if pos>-1:
					subtitle=orig_title[pos+1:].lstrip()
					orig_title=orig_title[ :pos].strip()
            if not ismatch(title):
				
                log.error('Rejecting as not close enough match: %s '%(title))
			
            else:
				# check subtitle
				searchmatch=True
				if sub_yes and len(subtitle)>0:
					orig_title+=':' + subtitle
					url='%s%s'%('http://www.adlibris.com',result_url[0])
					br1=self.browser
					response = br1.open_novisit(url, timeout=timeout)
					try:
						raw = response.read().strip()								
						raw = raw.decode('utf_8', errors='replace')					
						if not raw:
							log.error('Failed to get raw result for subtitlequery: %r'%url)
							return
						rootbase = fromstring(clean_ascii_chars(raw))
											
					except:
						msg = 'Failed to parse ADLIBRIS.com(NO) page for query: %r'%url
						log.exception(msg)
						searchmatch=False
						log.error('Rejecting as not close enough match: %s:%s '%(title,subtitle))
					subtitle_node=rootbase.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="subtitle"]')
					if subtitle_node:
						txt=lower(subtitle_node[0].text_content().strip())
						
						if not lower(subtitle)==txt:
							
							searchmatch=False
							log.error('Rejecting as not close enough match: %s:%s '%(title,txt))
							
					else:
						searchmatch=False
						log.error('Rejecting as not close enough match: %s:% '%(title,txt))
					
			    # Validate that the category  is  not one we are not interested in	
				valid_cat = True
				cat_node=result.xpath('div/div/span [@class="book-format"]')
				if cat_node:
					for cat in cat_node:
					    cattxt=cat.text_content().strip()
						
						valid_cat = True
						pos=-1
						pos=cattxt.find('CD-bok')	
						if pos>-1:
							valid_cat=False
							break
						pos=cattxt.find('MP3')	
						if pos>-1:
							valid_cat=False
							break
						pos= cattxt.find('vrig')
						if pos>-1: 
							valid_cat=False
							break
													
				
				
				rating='0'
			    if not valid_cat:
					log.error('Rejecting as not good category: %s, %s '%(cattxt,title))
					
				if valid_cat and searchmatch:
				    matches.append( '%s%s'%('http://www.adlibris.com',result_url[0]))
					ratings.append(rating)
					if len(matches) >= max_results:
						break
						
						
    def parse_true(self):
		return True
		
    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)

    test_identify_plugin(ADLIBRIS_NO.name,
        [
			( # A book without an ISBN  9788202435653 
                {	'title':'Huset ved havet', 'authors':[u'Hans Olav Lahlum']},
                [title_test('Huset ved havet', exact=True), 
				 authors_test(['Hans Olav Lahlum']),
				 series_test('Harry Hole', 10)]

            ),
			
		    ( # A book with an ISBN
                {'identifiers':{'isbn': '9788202435653'},
                  'title':'Politi', 'authors':['Henning Mankell']},
                [title_test('Politi', exact=True),
				 authors_test(['Henning Mankell']),
				  series_test('Wallander', 1)]
                    

            ),
			
			
			( # A book without an ISBN
                {'title':'S\xf8nnen', 'authors':[u'Jo Nesb\xf8']},
                [title_test('S\xf8nnen', exact=True), 
				 authors_test(['Jo Nesb\xf8']),
				 series_test('Harry Hole', 10)]

            ),
			
			( # A book with an ISBN
                {'identifiers':{'isbn': '9788203355936'},
                  'title':'Politi', 'authors':['Henning Mankell']},
                [title_test('Politi', exact=True),
				 authors_test(['Henning Mankell']),
				  series_test('Wallander', 1)]
                    

            ),
			
			( # A book without an ISBN
                {'title':'Pyramiden', 'authors':['Henning Mankell']},
                [title_test('Pyramiden',exact=True), 
				 authors_test(['Henning Mankell']) ,
				 series_test('Wallander', 1)]
				 
				
            ),
			
			( # A book without an ISBN 8202453836
                {	'title':'Bridget Jones : Mad about the boy', 'authors':[u'Helen Fielding']},
                [title_test('S\xf8nnen', exact=True), 
				 authors_test(['Jo Nesb\xf8']),
				 series_test('Harry Hole', 10)]

            ),
			
			
			
			
			( # A book without an ISBN
                {'title':'S\xf8nnen', 'authors':[u'Jo Nesb\xf8']},
                [title_test('S\xf8nnen', exact=True), 
				 authors_test(['Jo Nesb\xf8']),
				 series_test('Harry Hole', 10)]

            ),
			
			
			
			( # A book with an ISBN
                {'identifiers':{'isbn': '917036379'},
                  'title':'Pyramiden', 'authors':['Henning Mankell']},
                [title_test('Pyramiden', exact=True),
				 authors_test(['Henning Mankell']),
				  series_test('Wallander', 1)]
                    

            ),
				
			( # A book with an ISBN
                {'title':'Politi', 'authors':[u'Jo Nesb\xf8']},
                [title_test('Polis', exact=True), 
				 authors_test([u'Jo Nesb\xf8']),
				 series_test('Harry Hole', 10)]

            ),
			
			
			( # A book with an ISBN
                {'identifiers':{'isbn': '9789187679261'},
                    'title':'BARMH\xc4RTIGHETSMORDEN', 'authors':['STEFAN WHILDE']},
                [title_test('BARMH\xc4RTIGHETSMORDEN',
                    exact=True), authors_test(['STEFAN WHILDE'])]
                    

            )
            
			
            

        ],fail_missing_meta=True)
		
	