﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Hoffer Csaba <csaba.hoffer@gmail.com>, 2012, Gegesy Zsombor <gzsombor@gmail.com>'
__docformat__ = 'restructuredtext hu'

import socket, re
from threading import Thread
from calibre.ebooks.metadata.book.base import Metadata
import lxml, sys
import lxml.html as lh
from calibre.utils.date import utcnow
from datetime import datetime
from dateutil import parser
from calibre.ebooks.metadata import MetaInformation
from calibre import browser


class Worker(Thread): # Get details
	isbn = None
	'''
	Get book details from moly.hu book page in a separate thread
	'''

	def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
		Thread.__init__(self)
		self.daemon = True
		self.url, self.result_queue = url, result_queue
		self.log, self.timeout = log, timeout
		self.relevance, self.plugin = relevance, plugin
		self.browser = browser.clone_browser()
		self.cover_url = self.moly_id = None #self.isbn = None

	def run(self):
		try:
			self.get_details()
		except:
			self.log.exception('get_details failed for url: %r'%self.url)

	def get_details(self):
		
		try:
			response = self.browser.open_novisit(self.url, timeout=self.timeout)#.read().strip()
			root = self.plugin.parse_response_to_xml(response, self.url)
		except Exception as e:
			if callable(getattr(e, 'getcode', None)) and \
					e.getcode() == 404:
				self.log.error('URL malformed: %r'%self.url)
				return
			attr = getattr(e, 'args', [None])
			attr = attr if attr else [None]
			if isinstance(attr[0], socket.timeout):
				msg = 'Moly timed out. Try again later.'
				self.log.error(msg)
			else:
				msg = 'Failed to make details query: %r'%self.url
				self.log.exception(msg)
			return
		self.parse_details(root)

	def parse_details(self, root):
		search_data = ''
		isbn = None
		try:
			moly_id = self.parse_moly_id(self.url)
			self.log.info('Parsed moly identifier:%s'%moly_id)
		except:
			self.log.exception('Error parsing Moly id for url: %r'%self.url)
			moly_id = None

		try:
			title = self.parse_title(root)
			self.log.info('Parsed title:%s'%title)
		except:
			self.log.exception('Error parsing title for url: %r'%self.url)
			title = None
		
		try:
			authors = self.parse_authors(root)
			self.log.info('Parsed authors:%s'%authors)
		except:
			self.log.exception('Error parsing authors for url: %r'%self.url)
			authors = []

		if not title or not authors or not moly_id:
			self.log.error('Could not find title/authors/Moly id for %r'%self.url)
			self.log.error('Moly id: %r Title: %r Authors: %r'%(moly_id, title, authors))
			return

		mi = Metadata(title, authors)
		mi.set_identifier('moly', moly_id)
		self.moly_id = moly_id

		try:
			series = self.parse_series(root)
			self.log.info('Parsed series:%s'%series)
		except :
			self.log.exception('Error parsing series for url: %r'%self.url)
			series = None
			
		try:
			mi.comments = self.parse_comments(root)
			self.log.info('Parsed comments:%s'%mi.comments)
		except:
			self.log.exception('Error parsing comments for url: %r'%self.url)

		try:
			mi.tags = self.parse_tags(root)
			self.log.info('Parsed tags:%s'%mi.tags)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)
			
		try:
			mi.rating = self.parse_rating(root)
			self.log.info('Parsed rating:%s'%mi.rating)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)


		mi.source_relevance = self.relevance

		if series:
			mi.series = series
		
		if isbn:
			self.isbn = mi.isbn = isbn 

		try:
			self.cover_urls = self.parse_cover(root)
			self.log.info('Parsed URL for cover:%r'%self.cover_urls)
			self.plugin.set_cover_urls(self.moly_id, self.cover_urls)
			if (len(self.cover_urls) > 0):
				self.plugin.cache_identifier_to_cover_url(self.moly_id, self.cover_urls[0])
		except:
			self.log.exception('Error parsing cover for url: %r'%self.url)
		mi.has_cover = len(self.cover_urls) > 0
		self.log.info('cache_isbn_to_identifier(isbn = %s, moly_id = %s)'%(self.isbn, self.moly_id))
		if self.moly_id:
			self.plugin.cache_isbn_to_identifier(self.isbn, self.moly_id)
		self.plugin.clean_downloaded_metadata(mi)
		self.result_queue.put(mi)

	def parse_moly_id(self, url):
		moly_id_node = re.search('/konyvek/(.*)', url).groups(0)[0]
		if moly_id_node:
			return moly_id_node
		else: return None
		
	def parse_title(self, root):
		found_title = root.xpath('//div[@id="main"]//div[@id="content"]//h1[@class="hreview-aggregate"]//span[@class="fn"]/text()')
		self.log.info('Title/Author%s'%found_title)
		if len(found_title) > 0:
			return unicode(found_title[0])
		else: return None
		
	def parse_series(self, root):
		series_node = root.xpath('//div[@id="main"]//div[@id="content"]/div/div[4]/h3/a/text()')
		if series_node:
			return series_node[0]
		else: return None
		
	def parse_authors(self, root):
		found_authors = root.xpath('//div[@id="content"]//div[@class="authors"]/a/text()')
		self.log.info('found authors %s'%found_authors)
		if (found_authors and len(found_authors) >0):
			return [unicode(auth) for auth in found_authors]
		else: return None
		
	def parse_tags(self, root):
		tags_node = root.xpath('//*[@id="tags"]//*[@class="hover_link"]/text()')
		tags_node = [text for text in tags_node if text.strip()]
		if tags_node:
			return tags_node
		else: return None
			
	def parse_comments(self, root):
		description_node = root.xpath('//head/meta[@name="description"]/@content')
		if description_node:
			return ''.join(description_node)
		else: return None
	
	def parse_rating(self, root):
		rating_node = root.xpath('//div[@id="content"]/div/h1//span[@class="like_count"]//text()')
		self.log.info('Rating_node: %s'%rating_node)
		#rating_node = round(float(rating_node[0].strip().strip('%'))*0.05)
		if rating_node:
			return round(float(rating_node[0].strip().strip('%'))*0.05)
		else: return None
	
	def parse_cover(self, root):
		#book_cover = root.xpath('//meta[@property="og:image"]/@content')
		book_covers = root.xpath('//div[@class="coverbox"]//img/@src')
		# strip after the ? part from the paths
		book_cover_urls = [str(a).partition('?')[0] for a in book_covers]
		return book_cover_urls
	
	def create_separate_metadata(self, mi, cover_urls):
		first_isbn = self.isbn
		for url in cover_urls:
			newMetadata = mi.deepcopy()
			cover_id = re.search('/covers_([0-9]*)', url).groups(0)[0]
			newId = self.moly_id + '|cover=' + cover_id
			newMetadata.set_identifier('moly', newId)
			newMetadata.isbn = first_isbn
			self.plugin.cache_identifier_to_cover_url(newId, url)
			self.plugin.clean_downloaded_metadata(newMetadata)			
			self.result_queue.put(newMetadata)
			self.log.info('meta : %s -> %s'%(newMetadata.get_identifiers(), url))
			self.log.info('metadata added: %s, relevance: %s, isbn : %s'%(newMetadata, newMetadata.source_relevance, newMetadata.isbn))
			self.log.info('queue size %s'%self.result_queue.qsize())
			first_isbn = None
	
