﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, Wrangly'
__docformat__ = 'restructuredtext en'

import re
import socket, re, datetime
from threading import Thread
from lxml.html import fromstring, tostring
from calibre.ebooks.metadata.book.base import Metadata
import lxml, sys
import lxml.html as lh
from calibre.utils.date import utcnow
from datetime import datetime
from dateutil import parser
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.cleantext import clean_ascii_chars
from calibre import browser



class Worker(Thread):

	def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=30):
		Thread.__init__(self)
		self.daemon = True
		self.url, self.result_queue = url, result_queue
		self.log, self.timeout = log, timeout
		self.relevance, self.plugin = relevance, plugin
		self.browser = browser.clone_browser()
		self.cover_url = self.politeianet_id = self.isbn = None

	def run(self):
		try:
			self.get_details()
		except:
			self.log.exception('get_details failed for url: %r'%self.url)

	def get_details(self):
		try:
			raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
			raw = raw.decode('utf-8', errors='replace')
			if not raw:
				log.error('Failed to get raw result for query: %r'%query)
				return
		except Exception as e:
			if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
				self.log.error('URL malformed: %r'%self.url)
				return
			attr = getattr(e, 'args', [None])
			attr = attr if attr else [None]
			if isinstance(attr[0], socket.timeout):
				msg = 'Public.gr timed out. Try again later.'
				self.log.error(msg)
			else:
				msg = 'Failed to make details query: %r'%self.url
				self.log.exception(msg)
			return

		root = fromstring(clean_ascii_chars(raw))
		self.parse_details(root)

	def parse_details(self, root):
	
			
		try:
			politeianet_id = self.parse_politeianet_id(self.url)
			self.log.info('Parsed politeianet.gr identifier: %s'%politeianet_id)
		except:
			self.log.exception('Error parsing politeianet.gr id for url: %r'%self.url)
			politeianet_id = None

		try:
			title = self.parse_title(root)
			self.log.info('Parsed title: %s'%title)
		except:
			self.log.exception('Error parsing title for url: %r'%self.url)
			title = None
		
		try:
			authors = self.parse_authors(root)
			self.log.info('Parsed authors: %s'%authors)
		except:
			self.log.exception('Error parsing authors for url: %r'%self.url)
			authors = []

		if not title or not authors or not politeianet_id:
			self.log.error('Could not find title/authors/politeianet.gr id for %r'%self.url)
			self.log.error('Public.gr id: %r Title: %r Authors: %r'%(politeianet_id, title, authors))
			return

		mi = Metadata(title, authors)
		mi.set_identifier('politeianet', politeianet_id.strip("/")[4:])
		self.politeianet_id = politeianet_id

		try:
			series_info = self.parse_series(root)
			if series_info is not None:
				mi.series = series_info[0]
				mi.series_index = int(series_info[1])
				self.log.info('Parsed series: %s, series index: %f'%(mi.series,mi.series_index))
		except:
			self.log.exception('Error parsing series for url: %r'%self.url)		
		
		try:
			isbn = self.parse_isbn(root)
			self.log.info('Parsed ISBN: %s'%isbn)
			if isbn:
				self.isbn = mi.isbn = isbn
		except:
			self.log.exception('Error parsing ISBN for url: %r'%self.url)
					
		try:
			mi.comments = self.parse_comments(root)
			self.log.info('Parsed comments: %s'%mi.comments)
		except:
			self.log.exception('Error parsing comments for url: %r'%self.url)	

		try:
			self.cover_url = self.parse_covers(root)
			self.log.info('Parsed URL for cover: %r'%self.cover_url)
			self.plugin.cache_identifier_to_cover_url(self.politeianet_id, self.cover_url)
			mi.has_cover = bool(self.cover_url)
		except:
			self.log.exception('Error parsing cover for url: %r'%self.cover_url)

			
			
		try:
			mi.tags = self.parse_tags(root)
			self.log.info('Parsed tags: %s'%mi.tags)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)
		try:
			mi.rating = self.parse_rating(root)
			self.log.info('Parsed rating: %s\n\n'%mi.rating)
		except:
			self.log.exception('Error parsing tags for url: %r\n\n'%self.url)			
				
		try:
			mi.publisher = self.parse_publisher(root)
			self.log.info('Parsed publisher: %s'%mi.publisher)
		except:
			self.log.exception('Error parsing publisher for url: %r'%self.url)

			
		try:
			mi.languages = self.parse_languages(root)
			self.log.info('Parsed languages: %r'%mi.languages)
		except:
			self.log.exception('Error parsing language for url: %r'%self.url)
			
		try:
			mi.pubdate = self.parse_published_date(root)
			self.log.info('Parsed publication date: %s'%mi.pubdate)
		except:
			self.log.exception('Error parsing published date for url: %r'%self.url)
			
		mi.source_relevance = self.relevance

		if self.politeianet_id and self.isbn:
			self.plugin.cache_isbn_to_identifier(self.isbn, self.politeianet_id)

		self.plugin.clean_downloaded_metadata(mi)

		self.result_queue.put(mi)

	def parse_politeianet_id(self, url):
		try:
			m = re.search('/product/(.*)', url)
			if m:
				return m.group(1)
		except:
			return None

	def parse_series(self, root):
		series_node = root.xpath('//td[contains(., "Σειρά βιβλίου")]/following-sibling::td/text()')
		if not series_node:
			return None
		
		return series_node			
		
	def parse_isbn(self, root):
		isbn = None
		isbn_node = root.xpath('//td[contains(., "ISBN")]/following-sibling::td/text()')
		for isbn_value in isbn_node:
			m = re.search('(.+)', isbn_value)
			if m:
				isbn = m.group(1)
				break	
		return isbn				
	
	def parse_title(self, root):
		title_node = root.xpath('//*[@class="product-title"]/text()')
		self.log.info('Title: %s'%title_node)
		for i in title_node:
			sep = '('
			rest = i.split(sep, 1)[0]
		if title_node:
			return rest
		
	def parse_authors(self, root): 
		author_nodes = root.xpath('//div[@class="bookAuthorName"]/a/text()')
		self.log.info('Authors: %r'%author_nodes)
		if author_nodes:
			return [unicode(author) for author in author_nodes]
			

	def parse_rating(self, root):
		rating_node = root.xpath('//*[@class="facet-bar"][2]/text()')
		self.log.info('rating: %s'%rating_node)
		for rating_text in rating_node:
            rating_text = re.sub('[^0-9]', '', rating_text)
            rating_value = float(rating_text) * 0.05
            if rating_value >= 100:
                return rating_value / 100
		if rating_node:
            return rating_value 		

			
	def parse_tags(self, root):
		tags_node = root.xpath('//*[@id="breadcrumbs"]/li[position()=last()]/a/text()')
		self.log.info('test: %s'%tags_node)

		if tags_node:
			return [unicode(tag) for tag in tags_node]
			
	def parse_comments(self, root):
		description_node = root.xpath('//*[@class="bookProductSummaryText"]//text()')
		if description_node:
			return '\n'.join(description_node)	
			
	def parse_publisher(self, root):
		publisher_node = root.xpath('//*[contains(@href,"publisherName")]/text()')
		if publisher_node:
			return publisher_node[0]
		
	def parse_published_date(self, root):
		pub_year = None
		publication_node = root.xpath('//td[contains(., "Ημερ/νία έκδοσης:")]/following-sibling::td/text()')
		self.log.info('year: %s'%publication_node)
		for publication_value in publication_node:
			m = re.search('(\d{4})', publication_value)
			if m:
				pub_year = m.group(1)
				break
		
		if not pub_year:
			return None
		default = datetime.utcnow()
		from calibre.utils.date import utc_tz
		default = datetime(default.year, default.month, default.day, tzinfo=utc_tz)
		pub_date = parser.parse(pub_year, default=default)
		if pub_date:
			return pub_date

	def parse_covers(self, root):
		cover_node = root.xpath('//*[@class="product-photo"]//img/@src')
		bigcover_node = root.xpath('//*[@class="jcarousel-list jcarousel-list-horizontal"][0]/li[3]/a/@href')
		self.log.info('ola: %s'%cover_node)
		if bigcover_node:
			return bigcover_node
		else:
			return cover_node
		
			
	def parse_languages(self, root):
		lang_node = root.xpath('//*[@id="breadcrumbs"]//*[contains(.,"Ξενόγλωσσα")]')	
		if lang_node:
			return ['en']
		else:
			return ['ell']
			
