#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2012, Pr.BarnArt, based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars


import calibre_plugins.BOL_NL.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from BOL.com book page in a separate thread
	related to BOL_NL
    '''
	name                    = 'Worker'
    description             = _('Get book details from BOL.com book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (1, 6, 1)
    minimum_calibre_version = (0, 8, 0)


    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.bol_nl_id = self.isbn = None
		self.recensies_bol = None

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('BOL_NL url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Bol.com timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse Bol.com details page: %r'%self.url
            self.log.exception(msg)
            return
			
		default_recensies_bol = cfg.DEFAULT_STORE_VALUES[cfg.KEY_RECENSIES_BOL]
        self.recensies_bol = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_RECENSIES_BOL, default_recensies_bol)
		default_lp_covers = cfg.DEFAULT_STORE_VALUES[cfg.KEY_GET_LP_COVERS]
        self.lp_covers = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_GET_LP_COVERS, default_lp_covers)
		default_lponly_covers = cfg.DEFAULT_STORE_VALUES[cfg.KEY_GET_LPONLY_COVERS]
        self.lponly_covers = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_GET_LPONLY_COVERS, default_lponly_covers)

        self.parse_details(root,raw)

    def parse_details(self, root,raw):      
		try:
            bol_nl_id = self.parse_bol_nl_id(self.url)
			
        except:
            self.log.exception('Error parsing BOL-NL id for url: %r'%self.url)
            bol_nl_id = None
		try:		    
            title = self.parse_title(root)
			series = None
			series_index = None
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors or not bol_nl_id:
		    self.log.error('Could not find title/authors/bol_nl_id for %r'%self.url)
            self.log.error('bol_nl_id: %r Title: %r Authors: %r'%(bol_nl_id, title,
                authors))
		
            return
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
        self.bol_nl_id = bol_nl_id
       
        try:
            isbn = self.parse_isbn(raw)
            if isbn:
                self.isbn = isbn
				mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        #do rating coversion from txt to integer
		rat=float(self.rating)
		rat = rat + 0.5
		rat=int(rat)
		mi.rating=rat
		
		
        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.bol_nl_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.bol_nl_id)

        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_bol_nl_id(self, url):
        return re.search('bol.com/(.*/.*/\d+)', url).groups(0)[0]

   	def parse_title(self,root):
	   title_node=root.xpath('//div [@id="main_block"]/h1 [@itemprop="name"]')
	   if title_node:
			title=title_node[0].text_content().strip()
			return title
	
    def parse_authors(self, root):
		#search productinfo
		auteur_node = root.xpath('//div [@class="product_details"]/div [@class="product_creator bottom_s"]/a')
		authors = []
		if auteur_node:
		    for auteurs in auteur_node:
		        auteur=auteurs.text_content().strip()
                authors.append(auteur)
		        valid_contrib = None
		return authors

   
    def parse_isbn(self, raw): 
		findstart = raw.find('<h2 class="h5 bottom_xxs">Productinformatie</h2>')
		prodtxt=raw[findstart:findstart+2000]
		findstart=prodtxt.find('ISBN10</dt>')
		if findstart>0:
		    isbn=prodtxt[findstart+36:findstart+46]
		    return isbn
			
    def parse_rating(self, root):
        rating_node = root.xpath('//div[@class="w-box wgt-product-ratings"]/a/div/@class')
        if rating_node:
            # After the rewrite B&N no longer put the actual values of the rating in the web page
            # Instead they put words like "four half" for 4.5 and "four" for "4" in the style
            # <div class="product-rating four half">
            rating_class = rating_node[0]
            match = re.search('product-rating (.+)', rating_class)
            if match:
                rating_text = match.groups(0)[0]
                rating_parts = rating_text.split(' ')
                rating_values = ['zero','one','two','three','four','five']
                rating_value = float(rating_values.index(rating_parts[0]))
                if len(rating_parts) > 1:
                    rating_value += 0.5
                return rating_value
        else:
            # Try the textbook page rating lookup
            # <span class="avg-4h section_updateRating">
            rating_node = root.xpath('//span[contains(@class,"section_updateRating")]/@class')
            if rating_node:
                rating_text = rating_node[0][4:6]
                rating_value = float(rating_text[0])
                if rating_text[1] == 'h':
                    rating_value += 0.5
                return rating_value


		
    def parse_publisher(self, root):
        publisher = None
        pub_node = root.xpath('//div [@class="first_tab_paragraph"]/label')
		if pub_node:
			for label in pub_node:
			    
				txt=label.text_content().strip()
				find=txt.find('Uitgever')
				if find >=0:
					publisher=txt.rpartition(':')[2].strip()
					break

		return(publisher)

	def parse_published_date(self, root):
		pub_date = None
		pub_node = root.xpath('//div [@class="first_tab_paragraph"]/label')
		if pub_node:
			for label in pub_node:
				txt=label.text_content().strip()
				find=txt.find('Verschijningsjaar')
				if find >=0:
					year=txt.rpartition(':')[2].strip()
                    if 	len(year)==4:				
						pub_date=year
					#maybe more specific
					pub_date_node2=root.xpath('//div [@class="product_details"]/p [@class="small_details"]')
					if pub_date_node2 and len(year)==4:
						text=pub_date_node2[0].text_content()
						text=text.rpartition('|')[2].strip()
						if year in text:
							pub_date=text
					break  
		if not(pub_date is None):
			return  self._convert_date_text(pub_date) 

			
    def _convert_date_text(self, date_text):
        # Note that the date text could be "2003", "december 2003" or "December 10th 2003"
        year = int(date_text[-4:])
        month = 1
        day = 1
        if len(date_text) > 4:
            text_parts = date_text[:len(date_text)-5].partition(' ')
            month_name = text_parts[0]
            # Need to convert the month name into a numeric value
            # For now I am "assuming" the Goodreads website only displays in English
            # If it doesn't will just fallback to assuming January
            month_dict = {"januari":1, "februari":2, "maart":3, "april":4, "mei":5, "juni":6,
                "juli":7, "augustus":8, "september":9, "oktober":10, "november":11, "december":12}
            month = month_dict.get(month_name, 1)
            if len(text_parts[2]) > 0:
                day = int(re.match('([0-9]+)', text_parts[2]).groups(0)[0])
        from calibre.utils.date import utc_tz
        return datetime.datetime(year, month, day, tzinfo=utc_tz)

    def parse_comments(self, root):
        comments = ''                   
		description_node = root.xpath('//div [@id="js_product_description" and  @class="content_tab product_description"]')
        if description_node:
		    comments_org = tostring(description_node[0])
			comments=comments_org
            comments = comments.replace('<div id="js_product_description" class="content_tab product_description">','').strip()
			comments=comments.replace('<h2 class="h5 bottom_xxs">Beschrijving</h2>','')
			comments=comments.replace('<div class="bottom_m">','')
			comments=comments.replace('<h2>','')
			comments=comments.replace('<strong>','')
			comments=comments.replace('</strong>','<br>')
			comments=comments.replace('</h2>','<br>')
			comments =comments.replace('\n',' ')
			end=comments.find('</div>')
			if end>0:
			    comments=comments[0:end]
				                  
			start=comments_org.find('<h2 class="h5 bottom_xxs">Recensie(s)</h2>')
			if start>0:
				if  self.recensies_bol:
					comments2=comments_org[start:]
					comments2=comments2.replace('<h2 class="h5 bottom_xxs">Recensie(s)</h2>','')
					comments2=comments2.replace('<strong>','')
					comments2=comments2.replace('</strong>','<br>')
					comments2=comments2.replace('</h2>','<br>')
					comments2 =comments2.replace('\n',' ')
					end=comments2.find('</div>')
					if end>0:
						comments2=comments2[0:end]
					comments=comments+'<br><br>'+comments2
			comments= comments + '<br>(source: Bol.com)<br>'
		if comments:
			return comments

    def parse_cover(self, root):
		if self.lponly_covers or self.lp_covers:
			if self.isbn:
				search_url='http://www.literatuurplein.nl/zoeken-resultaat.jsp?x=38&y=8&isbn=' + self.isbn
				br1=self.browser
				timeout=30
				response = br1.open_novisit(search_url, timeout=timeout)
				info_url=response.geturl()
				pos=info_url.find('/boekdetail.jsp?boekId=')
				if pos>-1:
					book_id=info_url[pos+23:]
					end = book_id.find('&')
					if end>-1:
						book_id=book_id[0:end]
						url='%s%s%s'%('http://img.literatuurplein.nl/blobs/ORIGB/',book_id,'/1/1.jpg')
						#check if url is ok
						response=br1.open_novisit(url, timeout=timeout)
						type=response.info().getheader('Content-Type')
						pos = type.find('image')
						if pos>-1:						
							self.plugin.cache_identifier_to_cover_url(self.bol_nl_id,url)
							return url	
		if self.lponly_covers:
			return
		result_node=root.xpath('//div [@class="grid_left product_image_regular"]/a/img [@class="product_image_regular"]/@src')	
		if result_node:
			img_url = result_node[0]
			img_url = img_url.replace('regular','large')
			self.plugin.cache_identifier_to_cover_url(self.bol_nl_id, img_url)
			return img_url
		#geen  kijkexemplaar
		result_node=root.xpath('//div [@class="product_image"]/img/@src')
		if result_node:
		    img_url=result_node[0]
			pos=img_url.find('noimage')
			if pos<0:
				self.plugin.cache_identifier_to_cover_url(self.bol_nl_id, img_url)
				return img_url
			
		
			 
			
