#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Pr.BarnArt, based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars


import calibre_plugins.ADLIBRIS_DK.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from Adlibris.com book page in a separate thread
	related to BOL_NL
    '''
	name                    = 'Worker'
    description             = _('Get book details from Adlibris.com book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (0, 2, 0)
    minimum_calibre_version = (0, 8, 0)


    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.adlibris_dk_id = self.isbn = None
		#self.recensies_bol = None

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('ADLIBRIS_DK url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Adlibris.com timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('windows-1252', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse adlibris.com details page: %r'%self.url
            self.log.exception(msg)
            return
			
		#default_recensies_bol = cfg.DEFAULT_STORE_VALUES[cfg.KEY_RECENSIES_BOL]
        #self.recensies_bol = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_RECENSIES_BOL, default_recensies_bol)

        self.parse_details(root,raw)

    def parse_details(self, root,raw):  
		try:
            adlibris_dk_id = self.parse_adlibris_dk_id(self.url)
        except:
            self.log.exception('Error parsing adlibris.com(DK) id for url: %r'%self.url)
            adlibris_dk_id = None
		try:	
            (title, series, series_index) = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors or not adlibris_dk_id:
		    self.log.error('Could not find title/authors/ adlibris_dk_id for %r'%self.url)
            self.log.error('adlibris_dk_id: %r Title: %r Authors: %r'%(adlibris_dk_id, title,
                authors))
		
            return
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
        self.adlibris_dk_id = adlibris_dk_id
       
        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = isbn
				mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings for url: %r'%self.url)
				
        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.adlibris_dk_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.adlibris_dk_id)

        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_adlibris_dk_id(self, url):
        return re.search('isbn=(\d+)', url).groups(0)[0]
		
   	def parse_title(self,root):
		title_node = root.xpath('//div [@class="productTitleFormat"]/h1/span [@itemprop="name"]')
		if title_node:
			title=title_node[0].text_content().strip()
		else:
           return(None, None, None)	   
		serie_node=root.xpath('//div [@class="productInfo"]/ul [@class="info"]/li [@id="ctl00_main_frame_ctrlproduct_liSeries"]/a')
		if serie_node:
			serie_txt=serie_node[0].text_content().strip()
			number_regex = re.compile(' (\d+)') 
			findend=serie_txt.find(number_regex.search(serie_txt).group(0))
			if findend>0:
				serie_name=serie_txt[:findend]
				serie_index=number_regex.search(serie_txt).group(1)
				return (title, serie_name, serie_index)
			return(title,serie_txt,None)
		else:
			return(title,None,None)
			
    def parse_authors(self,root):
		#search productinfo
		auteur_node = root.xpath('//li [@class="liAuthor" and strong="Forfatter: "]/h2/a')
		authors = []
		if auteur_node:
		    for author in auteur_node:
			    node=author.xpath('../../strong')
				s=node[0].text_content().strip()
				if s=='Forfatter:':
					authors.append(author.text_content().strip())
			return authors
   
    def parse_isbn(self, root): 
	  #/table/tbody/tr/td[1]
		detail_node=root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo2"]/li [@id="ctl00_main_frame_ctrlproduct_liISBN"]/table/tr/td[2]')
		if detail_node:
		    isbn=detail_node[0].text_content().strip()
			return isbn
	
    def parse_rating(self, root):
        rating_node = root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo"]/li [@class="liReview"]/img/@src')
 		if rating_node:
            # After the rewrite B&N no longer put the actual values of the rating in the web page
            # Instead they put words like "four half" for 4.5 and "four" for "4" in the style
            # <div class="product-rating four half">
            rating_class = rating_node[0]
			rating_value=rating_class[-5:-4]
            return rating_value
        
		
    def parse_publisher(self, root):
        publisher = None
        pub_node = root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo"]/li [@id="ctl00_main_frame_ctrlproduct_liPublisher"]/h3/a/span [@itemprop="brand"]')
		if pub_node:
			publisher=pub_node[0].text_content().strip()
		return(publisher)

	def parse_published_date(self, root):
		pub_date = None  
		pub_node = root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo2"]/li [@id="ctl00_main_frame_ctrlproduct_liPublished"]/span')
		if pub_node:
			pub_date=pub_node[0].text_content().strip()
		if not(pub_date is None):
			return  self._convert_date_text(pub_date) 

			
    def _convert_date_text(self, date_text):
        # Note that the date text could be "2003", "december 2003" or "December 10th 2003"
        year = int(date_text[:4])
        month = 1
        day = 1
		if len(date_text)>4:
			month=int(date_text[4:])
        from calibre.utils.date import utc_tz
        return datetime.datetime(year, month, day, tzinfo=utc_tz)

    def parse_comments(self, root):
        comments = ''
        description_node = root.xpath('//div [@class="product"]/div [@class="productDescription"]/p/span [@itemprop="description"]')
        if description_node:
		    comments = tostring(description_node[0])
			comments= comments + '<br>(source: adlibris.com)<br>'
		if comments:
			return comments

    def parse_cover(self, root):
	    # first look for Prvlas
		result_node=root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo"]/li/span [@class="sample"]')	
		if result_node:
		    img_url='%s%s%s'%('http://www.adlibris.com/bookbrowser/Books/ZoomedOut/isbn', self.isbn ,'_fc_1.jpg')
			self.plugin.cache_identifier_to_cover_url(self.adlibris_dk_id, img_url)
			return img_url
		#geen  kijkexemplaar
		result_node=root.xpath('//div [@class="productInfo"]/div [@class="cover"]/span/img [@itemprop="image"]/@src')
		if result_node:
		    img_url=result_node[0]
			pos=img_url.find('noimage')
			if pos<0:
				self.plugin.cache_identifier_to_cover_url(self.adlibris_dk_id, img_url)
				return img_url
			
		
			 
			
