#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Pr.BarnArt, based on the Barnes work by Grant Drake'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring, etree



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from urllib import quote


import calibre_plugins.ADLIBRIS_NO.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from Adlibris.com book page in a separate thread
	related to ADLIBRIS_NO
    '''
	name                    = 'Worker'
    description             = _('Get book details from Adlibris.com (NO) book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (0, 0, 10)
    minimum_calibre_version = (0, 2, 5)


    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.adlibris_no_id = self.isbn = None
		#self.recensies_bol = None

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('ADLIBRIS_NO url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Adlibris.com timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return
		raw = raw.decode('utf-8', errors='replace')
        #raw = raw.decode('windows-1252', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse adlibris.com details page: %r'%self.url
            self.log.exception(msg)
            return

		self.parse_details(root,raw)

    def parse_details(self, root,raw): 
 		try:
            adlibris_no_id = self.parse_adlibris_no_id(self.url)
			adlibris_no_id=1
			
        except:
            self.log.exception('Error parsing adlibris.com(NO) id for url: %r'%self.url)
            adlibris_no_id = None
		try:	
             (title, series, series_index) = self.parse_title(root)

        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)

        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors:
		    self.log.error('Could not find title/authors/ adlibris_no_id for %r'%self.url)
            self.log.error('adlibris_no_id: %r Title: %r Authors: %r'%(adlibris_no_id, title,
                authors))
		
            return
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
		
			
        self.adlibris_no_id = adlibris_no_id
       
        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = isbn
				mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

		try:
			mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)
			
		try:
			mi.language=self.parse_language(root)
		except:
			self.log.exception('Error parsing language for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.adlibris_no_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.adlibris_no_id)

        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_adlibris_no_id(self, url):
		regex = re.compile("(?:\d*\.)?\d+")
		return regex.findall(url)

		
   	def parse_title(self,root):
						
		#title_node = root.xpath('//div [@class="productTitleFormat"]/h1/span [@itemprop="name"]')
		title_node = root.xpath('//div [@class="section product-header"]/div [@class="bd"]/h1 [@itemprop="name"]')
		if title_node:
			title=title_node[0].text_content().strip()
		else:
			return(None, None, None)
		import calibre_plugins.ADLIBRIS_NO.config as cfg
        #check  use subtitles
		sub_yes=False
		subtitle=''
		keys=cfg.plugin_prefs[cfg.STORE_NAME]
		if 'sub_yes' in keys:
			sub_yes = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_SUB_YES]
		if sub_yes:
			subtitle=''
			subtitle_node=root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="subtitle"]')
			if subtitle_node:
				subtitle=subtitle_node[0].text_content().strip()
			if len(subtitle)>0:
				title= '%s: %s'%(title,subtitle)	
		
        serie_txt = None
		index = None            
		serie_node = root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span/a [@class="block-link"]')
		if serie_node:
			r = None
			serie_txt=serie_node[0].text_content().strip()
			regex = re.compile(".*?([0-9]+)$")
			r=regex.findall(serie_txt)

			if len(r)>0:
				index=r[0]
				pos=serie_txt.find(index)
				if pos>-1:
					serie_txt= serie_txt[:pos-1].strip()
		return(title,serie_txt,index)
		
    def parse_authors(self,root):
		#search productinfo

		auteur_node = root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span/a/span [@itemprop="author"]')
		authors = []
		if auteur_node:
		    for aut in auteur_node:
				authortxt=aut.text_content().strip()
				authors.append(authortxt)
			return authors
		
	def parse_isbn(self, root): 
		detail_node=root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="isbn"]')
		if detail_node:
		    isbn=detail_node[0].text_content().strip()
			return isbn

	def parse_rating(self, root):
        rating_node = root.xpath('//div [@class="productInfo"]/ul [@id="ctl00_main_frame_ctrlproduct_ulProductInfo"]/li [@class="liReview"]/img/@src')
		if rating_node:
            rating_class = rating_node[0]
			rating_value=rating_class[-5:-4]
            return rating_value
	
	def parse_language(self, root):
        language = None						
        language_node = root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="inLanguage"]')
		if language_node:
			language=language_node[0].text_content().strip()
			pos=language.find('(')
			if pos>-1:
				language=language[:pos-1].strip()
		if language== None:
			language='Norsk'
		return(language)

	
    def parse_publisher(self, root):
	
        publisher = None							
        pub_node = root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="publisher"]/a')
		if pub_node:
			publisher=pub_node[0].text_content().strip()
		return(publisher)

	def parse_published_date(self, root):
	
		pub_date = None 	
		pub_node = root.xpath('//div [@class="product-info-panel__attributes container"]/ul/li/span [@itemprop="datePublished"]')
		if pub_node:
			pub_date=pub_node[0].text_content().strip()
			pub_date=pub_date.replace('-','')
		    #note expect only yyyy yyyymm yyyymmdd
			year = int(pub_date[:4])
			month = 1	
			day = 2
			if len(pub_date)>4:
				month=int(pub_date[4:6].lstrip('0'))
				if len(pub_date)>6:
					day=int (pub_date[6:].lstrip('0'))
		if not(pub_date is None):
			from calibre.utils.date import utc_tz
			return datetime.datetime(year, month, day, tzinfo=utc_tz)
			
   
    def parse_comments(self, root):
        comments = ''
        description_node = root.xpath('//div [@class="bd"]/div [@itemprop="description"]')
		if description_node:
			import calibre_plugins.ADLIBRIS_NO.config as cfg
			keys=cfg.plugin_prefs[cfg.STORE_NAME]
			if 'block_bold' in keys:
				delete_bold = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_BLOCK_BOLD]
			else:
				delete_bold=False
			if delete_bold:
				#delete bold texts as commercial,unless a citat, starting with doublequote 
				for com in description_node[0].xpath('b'):
					pt=com.text_content().strip()
					pos=pt.find('"')
					if pos<>0:
						description_node[0].remove(com)		
		    comments = description_node[0].text_content().strip()
			pos=comments.find('<div>')
			#comments = comments.replace('<div ','').strip()
			pos=comments.find('<div id="product-description" class="short" itemprop="description">')
			comments = comments.replace('<div id="product-description" class="short" itemprop="description">','').strip()
			pos=comments.find('<p class="description">')
			comments = comments.replace('<p class="description">','').strip()
			comments= comments + '<br>(source: adlibris.com)<br>'
		if comments:
			return comments

    def parse_cover(self, root):
		result_node=root.xpath('//div [@class="section product-header"]/div [@class="img"]/img/@src')
		#print('cover:', result_node)
		if result_node:
		    img_url=result_node[0]
			#print('cover2:', img_url)
			image_url='%s'%(img_url)
		    self.plugin.cache_identifier_to_cover_url(self.adlibris_no_id, image_url)
			return img_url
