#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2013, Pr.BarnArt'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
import urllib, os
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars


import calibre_plugins.INMONDADORI.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from inmondadori.it book page in a separate thread
	related to INMONDADORI
    '''
	name                    = 'Worker'
    description             = _('Get book details from inmondadori.it book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (0, 2, 1)
    minimum_calibre_version = (0, 1, 0)
	
	BASE_INMONDADORI_IT = 'http://www.inmondadori.it/'
	BASE_COVER_INMONDADORI= 'http://www.inmondadori.it/img/'
	COVER_INMONDADORI_LAST='/BL/BL/01/NZO/'


    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.inmondadori_id = self.isbn = None
		

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('INMONDADORI url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'INMONDADORI timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf8', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse inmondadori.it details page: %r'%self.url
            self.log.exception(msg)
            return
			
		
		
        self.parse_details(root,raw)

    def parse_details(self, root,raw):      
		try:
            inmondadori_id = self.parse_inmondadori_id(self.url)
			self.inmondadori_id=inmondadori_id
        except:
            self.log.exception('Error parsing inmondadori id for url: %r'%self.url)
            inmondadori_id = None
		try:		    
            title = self.parse_title(root)
			series = None
			series_index = None
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors or not inmondadori_id:
		    self.log.error('Could not find title/authors/inmondadori_id for %r'%self.url)
            self.log.error('inmondadori_id: %r Title: %r Authors: %r'%(inmondadori_id, title,
                authors))
		
            return
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
       
        try:
            isbn = self.parse_isbn(root)
			self.isbn=isbn
			mi.isbn=isbn
            							
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        #do rating coversion from txt to integer
		rat=float(self.rating)
		rat = rat + 0.5
		rat=int(rat)
		mi.rating=rat
		
		
        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root,title,authors)
		except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.inmondadori_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.inmondadori_id)
        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_inmondadori_id(self, url):
        return re.search('inmondadori.it/(.*/eai\d+)', url).groups(0)[0]

   	def parse_title(self,root):
	   title_node = root.xpath('//div [@class="product-data "]/div/h1 [@class="title"]')
 	   if title_node:
			title=title_node[0].text_content().strip()
			return title
	
    def parse_authors(self, root):
		auteur_node = root.xpath('//div [@class="product-data "]/div/p [@class="secondary-data"]/a')
		authors = []
		if auteur_node:
			authors.append(auteur_node[0].text_content().strip())
			return authors
			
		'''
		authors = []
		if auteur_node:
		    for auteurs in auteur_node:
		        auteur=auteurs.text_content().strip()
                authors.append(auteur)
		        valid_contrib = None
		return authors
        '''
   
    def parse_isbn(self, root): 
		detail_node=root.xpath('//div [@class="product-details"]/p [@class="text text-half eancode"]/span')
		if detail_node:
			isbn=detail_node[0].text_content().strip()
			return isbn	
					
    def parse_rating(self, root):
        rating_node = root.xpath('//div[@class="w-box wgt-product-ratings"]/a/div/@class')
        if rating_node:
            # After the rewrite B&N no longer put the actual values of the rating in the web page
            # Instead they put words like "four half" for 4.5 and "four" for "4" in the style
            # <div class="product-rating four half">
            rating_class = rating_node[0]
            match = re.search('product-rating (.+)', rating_class)
            if match:
                rating_text = match.groups(0)[0]
                rating_parts = rating_text.split(' ')
                rating_values = ['zero','one','two','three','four','five']
                rating_value = float(rating_values.index(rating_parts[0]))
                if len(rating_parts) > 1:
                    rating_value += 0.5
                return rating_value
        else:
            rating_node = root.xpath('//span[contains(@class,"section_updateRating")]/@class')
            if rating_node:
                rating_text = rating_node[0][4:6]
                rating_value = float(rating_text[0])
                if rating_text[1] == 'h':
                    rating_value += 0.5
                return rating_value


		
    def parse_publisher(self, root):
		publisher = None
  		detail_node=root.xpath('//div [@class="product-details"]/p [@class="text text-half"]')
		if detail_node:
			for detail in detail_node:
				txt_node=detail
				if txt_node:
					text = txt_node.text_content().strip()
					if  text[:7 ]== 'Editore':
						publisher=txt_node[0].text_content().strip()
						return publisher

	def parse_published_date(self, root):
		pub_date = None
		detail_node=root.xpath('//div [@class="product-details"]/p [@class="text text-half"]')
		if detail_node:
			for detail in detail_node:
				txt_node=detail
				if txt_node:
					text = txt_node.text_content().strip()
					if  text[:10] == 'Pubblicato':
						pub_date=detail[0].text_content().strip()
						#return pub_date
		
		if not(pub_date is None):
			return  self._convert_date_text(pub_date) 

    def _convert_date_text(self, date_text):
        # Note that the date text could be "2003", "december 2003" or "December 10th 2003"
		year= 2013
		month = 1
        day = 1
        if len(date_text) > 4:
            text_parts = date_text.split('/')
			year = int(text_parts[2])
            month = int(text_parts[1])
            day=int(text_parts[0])
		else:
			if len(date_text) == 4:
				year=int(date_text)
		
        from calibre.utils.date import utc_tz
        return datetime.datetime(year, month, day, tzinfo=utc_tz)

    def parse_comments(self, root):
        comments = ''
		description_node=root.xpath('//div [@class="product-description-wide"]/p [@class="text"]')
		if description_node:
			comments = description_node[0].text_content().strip()
			comments= comments + '<br>(source: inmondadori_it.com)<br>'
		if comments:
			return comments
			
        

    def parse_cover(self, root,title,authors):
		txt='%s%s%s%s%s'%(title,' ',authors[0],'/ea',self.isbn[:-1])
		txt=txt.replace(' ','-')
		img_url='%s%s%s'%(Worker.BASE_COVER_INMONDADORI,txt,Worker.COVER_INMONDADORI_LAST)
		self.plugin.cache_identifier_to_cover_url(self.inmondadori_id, img_url)
		return img_url		 
			
