#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2012, Pr.BarnArt based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import time
from urllib import quote
from Queue import Queue, Empty

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc
#from calibre import ipython

class BOL_NL(Source):

    name                    = 'BOL_NL'
    description             = _('Downloads metadata en covers van BOL.com or from Literatuurplein.nl')
    author                  = 'Pr. BarnArt'
    version                 = (1, 6, 1)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True
	cached_cover_url_is_reliable = True

	BASE_URL0 = 'http://www.bol.com'
    BASE_URL = 'http://www.bol.com/nl/s/boeken/zoekresultaten/Ntt/'
	BASE_ISBN= '/Nty/1/search/true/searchType/adv/section/books/N/8293/Ntk/isbncode/index.html?_requestid=116710'
	BASE_URL_1 = '/Ntk/nl_books_all/Nty/1/N/8299+8293/Ne/8299+8293'
	BASE_URL_LAST = '/search/true/searchType/qck/index.html?_requestid=17572' 
	BASE_ISBN0= 'http://www.bol.com/nl/s/boeken/zoekresultaten/Ntt/'
	BASE_ISBN1='/Ntk/isbncode/Nty/1/N/8299+8293/Ne/8299+8293/search/true/searchType/qck/toonAlle/true/index.html?_requestid=136981'
	
	
	
    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.BOL_NL.config import ConfigWidget
        return ConfigWidget(self)

    def get_book_url(self, identifiers):
        bol_nl_id = identifiers.get('bol_nl', None)
		if bol_nl_id:
		    return ('bol_nl', bol_nl_id,
                  '%s%s%s'%(BOL_NL.BASE_URL, bol_nl_id,BOL_NL.BASE_URL_LAST))
				  

    def create_query(self, log, title=None, authors=None, identifiers={}):

        isbn = check_isbn(identifiers.get('isbn', None))		
        q = ''
        if isbn is not None:		    
            return '%s%s%s'%(BOL_NL.BASE_ISBN0,isbn,BOL_NL.BASE_ISBN1)
        if title:		   
            # BOL  maybe doesn't cope very well with non ascii names so convert
            title = get_udc().decode(title)
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]               
				q='+'.join(tokens)
				
			
        if authors:
            # BOL  maybe doesn't cope very well with non ascii names so convert
            authors = [get_udc().decode(a) for a in authors]
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
				q+='+'+'+'.join(tokens)
			
        if not q:
            return None
        return '%s%s%s%s'%(BOL_NL.BASE_URL, q,BOL_NL.BASE_URL_1,BOL_NL.BASE_URL_LAST) 

    def get_cached_cover_url(self, identifiers):
        url = None
        bol_nl_id = identifiers.get('bol_nl', None)
        if bol_nl_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
               bol_nl_id = self.cached_isbn_to_identifier(isbn)
        if bol_nl_id is not None:
            url = self.cached_identifier_to_cover_url(bol_nl_id)
        return url

    def cached_identifier_to_cover_url(self, id_):
        with self.cache_lock:
            url = self._get_cached_identifier_to_cover_url(id_)
            if not url:
                # Try for a "small" image in the cache
                url = self._get_cached_identifier_to_cover_url('small/'+id_)
            return url

    def _get_cached_identifier_to_cover_url(self, id_):
        # This must only be called once we have the cache lock
        url = self._identifier_to_cover_url_cache.get(id_, None)
        if not url:
            # We could not get a url for this particular B&N id
            # However we might have one for a different isbn for this book
            # Barnes & Noble are not very consistent with their covers and
            # it could be that the particular ISBN we chose does not have
            # a large image but another ISBN we retrieved does.
            key_prefix = id_.rpartition('/')[0]
            for key in self._identifier_to_cover_url_cache.keys():
                if key.startswith('key_prefix'):
                    return self._identifier_to_cover_url_cache[key]
        return url

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
		#need to read the ratings of an indexpage, i cannot trace them on the productpage
		#use ratings for the selected matches, so the order of rating is equal to matches 
		ratings = []
		# If we have a BOL id then we do not need to fire a "search"
        # at BOL.com. Instead we will go straight to the URL for that book.
        bol_nl_id = identifiers.get('bol_nl', None)
	    isbn = check_isbn(identifiers.get('isbn', None))		
        br = self.browser

        if bol_nl_id:		    
            matches.append('%s%s%s%s'%(BOL_NL.BASE_URL, bol_nl_id,BOL_NL.BASE_URL_1,BOL_NL.BASE_URL_LAST))
        else:		   
            query = self.create_query(log, title=title, authors=authors,
                    identifiers=identifiers)	
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            isbn_match_failed = False
            log.info('Querying: %s'%query)
            response = br.open_novisit(query, timeout=timeout)
            if isbn:
			    try:				    
					raw = response.read().strip()								
					raw = raw.decode('utf-8', errors='replace')					
					if not raw:
						log.error('Failed to get raw result for query: %r'%query)
						return
					root = fromstring(clean_ascii_chars(raw))
					url_node=root.xpath('//a[@class="product_name"]/@href')
					isbn_url=query
					matches.append(isbn_url)
					rating='0'
					ratings.append(rating)
					isbn_match_failed = False
				except:
					msg = 'Failed to parse BOL.com page for query: %r'%query
					log.exception(msg)
					return msg					
			
            # For successful ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
			if not isbn or isbn_match_failed:
                try:				    
                    raw = response.read().strip()
                    raw = raw.decode('utf-8', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query: %r'%query)
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse BOL.com page for query: %r'%query
                    log.exception(msg)
                    return msg
                # Now grab the matches from the search results, provided the
                # title and authors appear to be for the same book
                self._parse_search_results(log, title, authors, root, matches,ratings,timeout)

        if abort.is_set():
            return
        if not matches:		   
            if identifiers and title and authors:
                log.info('No matches found with identifiers, retrying using only'
                        ' title and authors')
                return self.identify(log, result_queue, abort, title=title,
                        authors=authors, timeout=timeout)
            log.error('No matches found with query: %r'%query)
            return

		
				
        from calibre_plugins.BOL_NL.worker import Worker
		#need to mix url and ratings for the worker class
		combos= zip(matches,ratings)
        workers = [Worker(combo, result_queue, br, log, i, self) for  i,combo in
                enumerate(combos)]
		

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, ratings, timeout):
        results = root.xpath('//a[@class="product_name"]')
		for result in results:
           result_url=result.xpath('@href')		  
        if not results:
            return		

        def ismatch(title):
		     #only if the title exact matches
			match = False
			if lower(title)==lower(orig_title):
				match= True
			if match==False:
				if lower(orig_title[:3])=='de ':
					if lower(title)==lower(orig_title[3:]):
					    match=True
				if lower(orig_title[:4])=='het ':
					if lower(title)==lower(orig_title[4:]):
					    match=True
				if lower(orig_title[:4])=='een ':
					if lower(title)==lower(orig_title[4:]):
					    match=True		
			return match 
           
        import calibre_plugins.BOL_NL.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
        for result in results:
            # Annoyingly, there isn't consistency in the search results
            # Sometimes the next level will be in <section> tags, sometimes not.
            title = result.text_content().replace('\n',' ').strip()
            while '  ' in title:
                title = title.replace('  ',' ')
            # Strip off any series information from the title
            if '(' in title:
                title = title.rpartition('(')[0].strip()
                title_tokens = list(self.get_title_tokens(orig_title))
            if not ismatch(title):
                log.error('Rejecting as not close enough match: %s '%(title))
				
            else:
			    # Validate that the cat  is  not one we are not interested in	
				txt='//div [@class="product_title"]/a [@class="link_title"  and @href="' + result.xpath('@href')[0] + '"]/../../div [@class="subtitle"]'	
				cat_details = root.xpath(txt)
				valid_cat = True
				for c in cat_details:
				    cat = c.text_content().strip().lower()
					if cat.find('gesproken woord') >= 0 : 
						valid_cat = False
					if cat.find(' cd ' ) >= 0:
						valid_cat = False
					if cat.find('luisterboek') >= 0:
						valid_cat = False
					if cat.find('voorgelezen') >= 0:
						valid_cat = False
					if valid_cat == False:
						break
				rating='0'
				
				rtxt='//div [@class="product_line"]/a [@class="product_name"  and @href="' + result.xpath('@href')[0] + '"]/../../p [@class="product_specs small_details"]/img /@title'
				rating_node=root.xpath(rtxt)
				if rating_node:
					rating_title=rating_node[0]
					rating=rating_title.rpartition(':')[2].strip()
					end=rating.find(' ')
					rating=rating[0:end]               				
				if valid_cat:
				   	#result_url = BOL_NL.BASE_URL0 + result.xpath('@href')[0]
					result_url = result.xpath('@href')[0]								
					matches.append( result_url)	
					ratings.append(rating)
					if len(matches) >= max_results:
						break
        
		    

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test)
    test_identify_plugin(BOL_NL.name,
        [

            ( # A book with no ISBN specified
                {'title':"Time out", 'authors':['Judith Visser']},
                [title_test("Time out",
                    exact=True), authors_test(['Judith Visser']),
                    ]

            ),
			( # A book with an ISBN
                {'identifiers':{'isbn': 'isbn:9041413480'},
                    'title':'De eetclub', 'authors':['Saskia Noort']},
                [title_test('De eetclub',
                    exact=True), authors_test(['Saskia Noort']),
                    ]

            ),


           
        ])


