#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Pr.BarnArt, based on the Barnes work by Grant Drake and BOL_NL'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
import urllib, os
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars


import calibre_plugins.ANOBII.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from ANOBII/BOL_IT book page in a separate thread
	related to ANOBII
    '''
	name                    = 'Worker'
    description             = _('Get book details from anobii book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (0, 4, 0)
    minimum_calibre_version = (0, 8, 0)
	
	BASE_BOL_IT = 'http://www.bol.it/libri/dum/dum/ea978'
	BASE_COVER_BOL= 'http://www.bol.it/image/?cdSoc=BL&ean=978'
	COVER_BOL_LAST='&cdSito=BL&tpPrd=01&tipoOggetto=ZOM'


    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.anobii_id = self.isbn = None
		self.cover_bol = self.comment_bol = self.bol_url = None
		self.bol_root = None

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('ANOBII url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Anobii timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf8', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse anobii.com details page: %r'%self.url
            self.log.exception(msg)
            return
			
		
		default_cover_bol = cfg.DEFAULT_STORE_VALUES[cfg.KEY_COVER_BOL_IT]
        self.cover_bol = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_COVER_BOL_IT, default_cover_bol)
		default_comment_bol = cfg.DEFAULT_STORE_VALUES[cfg.KEY_COMMENTS_BOL_IT]
        self.comment_bol = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_COMMENTS_BOL_IT, default_comment_bol)
        
		#print('cover_bol,comment_bol:',self.cover_bol,self.comment_bol)

        self.parse_details(root,raw)

    def parse_details(self, root,raw):      
		try:
            anobii_id = self.parse_anobii_id(self.url)
			#print ('anobii_id', anobii_id)
			
        except:
            self.log.exception('Error parsing ANOBII id for url: %r'%self.url)
            anobii_id = None
		try:		    
            title = self.parse_title(root)
			series = None
			series_index = None
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors or not anobii_id:
		    self.log.error('Could not find title/authors/anobii_id for %r'%self.url)
            self.log.error('anobii_id: %r Title: %r Authors: %r'%(anobii_id, title,
                authors))
		
            return
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
        self.anobii_id = anobii_id
       
        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = isbn
				mi.isbn = isbn
				if (self.cover_bol) or (self_comment_bol):
					self.bol_url='%s%s'%(Worker.BASE_BOL_IT,isbn[:-1])
					#print(self.bol_url)
					if self.bol_url:
						try:
							self.log.info('BOL_IT url: %r'%self.bol_url)
							rawB = self.browser.open_novisit(self.bol_url, timeout=self.timeout).read().strip()
						except Exception as e:
							if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
								self.log.error('URL malformed: %r'%self.url)
								attr = getattr(e, 'args', [None])
							attr = attr if attr else [None]
							if isinstance(attr[0], socket.timeout):
								msg = 'Anobii timed out. Try again later.'
								self.log.error(msg)
							else:
								msg = 'Failed to make details query: %r'%self.url
								self.log.exception(msg)
						rawB = rawB.decode('windows-1252', errors='replace')
						if '<title>404 - ' in rawB:
							self.log.error('URL malformed: %r'%self.bol_url)
							
						try:
							self.bol_root = fromstring(clean_ascii_chars(rawB))			
						except:
							msg = 'Failed to parse bol-it.com details page: %r'%self.url
							self.log.exception(msg)
							self.bol_root = None
							
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        #do rating coversion from txt to integer
		rat=float(self.rating)
		rat = rat + 0.5
		rat=int(rat)
		mi.rating=rat
		
		
        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
			#print('cover_ulr:',self.cover_url)
		except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.anobii_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.anobii_id)

        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_anobii_id(self, url):
        return re.search('anobii.com/(.*/.*/\d+)', url).groups(0)[0]

   	def parse_title(self,root):
	   title_node = root.xpath('//div [@id="content"]/div/div [@class="info"]/h1 [@class="title"]')
       #print('title_node',title_node)
	   if title_node:
			title=title_node[0].text_content().strip()
			#print('title',title)
			return title
	
    def parse_authors(self, root):
		auteur_node = root.xpath('//div [@id="content"]/div/div [@class="info"]/p [@class="contributor"]/span/a')
		#print ('authors_node:',auteur_node)
		authors = []
		if auteur_node:
			authors.append(auteur_node[0].text_content().strip())
			#print ('authors:',authors)
			return authors
			
		'''
		authors = []
		if auteur_node:
		    for auteurs in auteur_node:
		        auteur=auteurs.text_content().strip()
                authors.append(auteur)
		        valid_contrib = None
		return authors
        '''
   
    def parse_isbn(self, root): 
	                           
	    #isbn_node=root.xpath('//div [@id="product_details"]/ul [@class="details"]/li[4]/strong')
		#print ('isbn_check')
		detail_node=root.xpath('//div [@id="product_details"]/ul [@class="details"]/li')
		#print ('detail_node:',detail_node)
		if detail_node:
			for detail in detail_node:
			    #print ('detail:',detail)
				txt_node=detail.xpath('span')
				#print ('txt_node: ', txt_node)
				if txt_node:
					text = txt_node[0].text_content().strip()
					#print ('text: ',text)
					if  text == 'ISBN-10:':
						isbn_node=detail.xpath('strong')
						#print ('isbn_node:',isbn_node)
						if isbn_node:
							isbn=isbn_node[0].text_content().strip()
							#print ('isbn:',isbn)
							return isbn
					
					
					
    def parse_rating(self, root):
        rating_node = root.xpath('//div[@class="w-box wgt-product-ratings"]/a/div/@class')
        if rating_node:
            # After the rewrite B&N no longer put the actual values of the rating in the web page
            # Instead they put words like "four half" for 4.5 and "four" for "4" in the style
            # <div class="product-rating four half">
            rating_class = rating_node[0]
            match = re.search('product-rating (.+)', rating_class)
            if match:
                rating_text = match.groups(0)[0]
                rating_parts = rating_text.split(' ')
                rating_values = ['zero','one','two','three','four','five']
                rating_value = float(rating_values.index(rating_parts[0]))
                if len(rating_parts) > 1:
                    rating_value += 0.5
                return rating_value
        else:
            # Try the textbook page rating lookup
            # <span class="avg-4h section_updateRating">
            rating_node = root.xpath('//span[contains(@class,"section_updateRating")]/@class')
            if rating_node:
                rating_text = rating_node[0][4:6]
                rating_value = float(rating_text[0])
                if rating_text[1] == 'h':
                    rating_value += 0.5
                return rating_value


		
    def parse_publisher(self, root):
        publisher = None
		pub_node = root.xpath('//div [@id="product_details"]/ul [@class="details"]/li/span/span[@class="translatable" and @lang="Publisher"]/../../strong')
		#print ('pub_node:',pub_node)
		if pub_node:
			publisher=pub_node[0].text_content().strip()
			#print ('publisher:',publisher)
			return(publisher)
		

	def parse_published_date(self, root):
		pub_date = None
		pub_node = root.xpath('//div [@class="first_tab_paragraph"]/label')
		if pub_node:
			for label in pub_node:
				txt=label.text_content().strip()
				find=txt.find('Verschijningsjaar')
				if find >=0:
					year=txt.rpartition(':')[2].strip()
                    if 	len(year)==4:				
						pub_date=year
					#maybe more specific
					pub_date_node2=root.xpath('//div [@class="product_details"]/p [@class="small_details"]')
					if pub_date_node2 and len(year)==4:
						text=pub_date_node2[0].text_content()
						text=text.rpartition('|')[2].strip()
						if year in text:
							pub_date=text
					break  
		if not(pub_date is None):
			return  self._convert_date_text(pub_date) 

			
    def _convert_date_text(self, date_text):
        # Note that the date text could be "2003", "december 2003" or "December 10th 2003"
        year = int(date_text[-4:])
        month = 1
        day = 1
        if len(date_text) > 4:
            text_parts = date_text[:len(date_text)-5].partition(' ')
            month_name = text_parts[0]
            # Need to convert the month name into a numeric value
            # For now I am "assuming" the Goodreads website only displays in English
            # If it doesn't will just fallback to assuming January
            month_dict = {"januari":1, "februari":2, "maart":3, "april":4, "mei":5, "juni":6,
                "juli":7, "augustus":8, "september":9, "oktober":10, "november":11, "december":12}
            month = month_dict.get(month_name, 1)
            if len(text_parts[2]) > 0:
                day = int(re.match('([0-9]+)', text_parts[2]).groups(0)[0])
        from calibre.utils.date import utc_tz
        return datetime.datetime(year, month, day, tzinfo=utc_tz)

    def parse_comments(self, root):
        comments = ''
		#print ('self.comment_bol:',self.comment_bol)
		#print ('self.bol_root:',self.bol_root)
		if self.comment_bol and self.bol_root <> None:
			description_node=self.bol_root.xpath('//div [@id="contenuto_scheda"]/p [@class="bol_spacer"]')
			#print('boldescription_node:',description_node)
			if description_node:
				comments = description_node[0].text_content().strip()
				comments= comments + '<br>(source: bol_it.com)<br>'
			#print('bolcomments:',comments)
		if comments:
			return comments
			
        description_node = root.xpath('//div [@id="tab_content"]/div [@id="product_description"]/div [@id="description_full"]/p')
		#print('description_node:',description_node)
        if description_node:
		    comments = description_node[0].text_content().strip()
			'''
            comments = comments.replace('<div id="js_product_description" class="content_tab">','').strip()
			comments=comments.replace('<h2>Beschrijving</h2>','')
			comments=comments.replace('<h2>','')
			comments=comments.replace('<strong>','')
			comments=comments.replace('</strong>','<br>')
			comments=comments.replace('</h2>','<br>')
			comments =comments.replace('\n',' ')
			end=comments.find('<div class')
			if end>0:
			    comments=comments[0:end]
			'''
			comments= comments + '<br>(source: Anobii.com)<br>'
			#print('comments:',comments)
		if comments:
			return comments

    def parse_cover(self, root):
	    if self.cover_bol and self.bol_root:
		    img_url='%s%s%s'%(Worker.BASE_COVER_BOL,self.isbn[:-1],Worker.COVER_BOL_LAST)
			self.plugin.cache_identifier_to_cover_url(self.anobii_id, img_url)
			#print('bolimg_url:',img_url)
			
			link = img_url
			#print ('opening url:', link) 
			site = urllib.urlopen(link)
			meta = site.info() 
			#print (meta)
			#print ('Content-Length:', meta.getheaders("Content-Type")[0] )
			
			
			return img_url
		result_node=root.xpath('//div [@id="content"]/div [@id="product_info"]/div [@class="cover"]/img/@src')
		#print('result_node:',result_node)
		if result_node:
			img_url = result_node[0]
			pos=img_url.find("no_image")
			#print('pos:',pos)
			if pos<0:
			    img_url = img_url.replace('type=3','type=5')
				self.plugin.cache_identifier_to_cover_url(self.anobii_id, img_url)
				#print('img_url:',img_url)
				return img_url
		#geen  kijkexemplaar
		'''
		result_node=root.xpath('//div [@class="product_image"]/img/@src')
		if result_node:
		    img_url=result_node[0]
			pos=img_url.find('noimage')
			if pos<0:
				self.plugin.cache_identifier_to_cover_url(self.anobii_id, img_url)
				return img_url
		'''	
		
			 
			
