﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Hoffer Csaba <csaba.hoffer@gmail.com>'
__docformat__ = 'restructuredtext hu'

import socket, re, datetime
from threading import Thread
from lxml.html import fromstring, tostring
from calibre.ebooks.metadata.book.base import Metadata
import lxml, sys
import lxml.html as lh
from calibre.utils.date import utcnow
from datetime import datetime
from dateutil import parser

class Worker(Thread): # Get details

	'''
	Get book details from Libri book page in a separate thread
	'''

	def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
		Thread.__init__(self)
		self.daemon = True
		self.url, self.result_queue = url, result_queue
		self.log, self.timeout = log, timeout
		self.relevance, self.plugin = relevance, plugin
		self.browser = browser.clone_browser()
		self.cover_url = self.libri_id = self.isbn = None

	def run(self):
		try:
			self.get_details()
		except:
			self.log.exception('get_details failed for url: %r'%self.url)

	def get_details(self):
		
		try:
			raw = self.browser.open_novisit(self.url, timeout=self.timeout)
		except Exception as e:
			if callable(getattr(e, 'getcode', None)) and \
					e.getcode() == 404:
				self.log.error('URL malformed: %r'%self.url)
				return
			attr = getattr(e, 'args', [None])
			attr = attr if attr else [None]
			if isinstance(attr[0], socket.timeout):
				msg = 'Libri timed out. Try again later.'
				self.log.error(msg)
			else:
				msg = 'Failed to make details query: %r'%self.url
				self.log.exception(msg)
			return

		root = lh.parse(raw)
		self.parse_details(root)

	def parse_details(self, root):
		try:
			libri_id = self.parse_libri_id(self.url)
			self.log.info('Parsed Libri identifier:%s'%libri_id)
		except:
			self.log.exception('Error parsing Libri id for url: %r'%self.url)
			libri_id = None

		try:
			title = self.parse_title(root)
			self.log.info('Parsed title:%s'%title)
		except:
			self.log.exception('Error parsing title for url: %r'%self.url)
			title = None
		
		try:
			authors = self.parse_authors(root)
			self.log.info('Parsed authors:%s'%authors)
		except:
			self.log.exception('Error parsing authors for url: %r'%self.url)
			authors = []

		if not title or not authors or not libri_id:
			self.log.error('Could not find title/authors/Libri id for %r'%self.url)
			self.log.error('Libri id: %r Title: %r Authors: %r'%(libri_id, title, authors))
			return

		mi = Metadata(title, authors)
		mi.set_identifier('libri', libri_id)
		self.libri_id = libri_id

		try:
			isbn = self.parse_isbn(root)
			self.log.info('Parsed ISBN:%s'%isbn)
			if isbn:
				self.isbn = mi.isbn = isbn
		except:
			self.log.exception('Error parsing ISBN for url: %r'%self.url)

		try:
			series = self.parse_series(root)
			self.log.info('Parsed series:%s'%series)
		except :
			self.log.exception('Error parsing series for url: %r'%self.url)
			series = None
			
			
		try:
			mi.comments = self.parse_comments(root)
			self.log.info('Parsed comments:%s'%mi.comments)
		except:
			self.log.exception('Error parsing comments for url: %r'%self.url)

		try:
			self.cover_url = self.parse_cover(root)
			self.log.info('Parsed URL for cover:%r'%self.cover_url)
			self.plugin.cache_identifier_to_cover_url(self.libri_id, self.cover_url)
		except:
			self.log.exception('Error parsing cover for url: %r'%self.url)
		mi.has_cover = bool(self.cover_url)

		try:
			mi.publisher = self.parse_publisher(root)
			self.log.info('Parsed publisher:%s'%mi.publisher)
		except:
			self.log.exception('Error parsing publisher for url: %r'%self.url)
			
		try:
			mi.tags = self.parse_tags(root)
			self.log.info('Parsed tags:%s'%mi.tags)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)

		try:
			mi.pubdate = self.parse_published_date(root)
			self.log.info('Parsed publication date:%s'%mi.pubdate)
		except:
			self.log.exception('Error parsing published date for url: %r'%self.url)

		mi.source_relevance = self.relevance

		if series:
			mi.series = series

		if self.libri_id:
			if self.isbn:
				self.plugin.cache_isbn_to_identifier(self.isbn, self.libri_id)

		self.plugin.clean_downloaded_metadata(mi)

		self.result_queue.put(mi)

	def parse_libri_id(self, url):
		libri_id = re.search('/konyv/(.*).html', url).groups(0)[0]
		return libri_id
		
	def book_property(self, root):
		book_property = root.xpath('//div[@class="props"]//text()')
		book_property = [text for text in book_property if text.lstrip()]
		if book_property:
			return book_property
		
	def parse_title(self, root):
		try:
			title_node = root.xpath('//div[@class="title"]/h1')
			title_text = title_node[0].text_content().strip()
		except:
			return
		if title_node:
			return title_text

	def parse_series(self, root):
		try:
			i = self.book_property(root).index(u'Sorozat')
		except:
			return
		series_node = ''.join(self.book_property(root)[i+2]).strip().title()
		if series_node:
			return series_node
		
	def parse_authors(self, root):
		try:
			author_nodes = root.xpath('//div[@class="title"]/h2/a')
			if author_nodes:
				authors = []
				for author_value in author_nodes:
					author = tostring(author_value, method='text', encoding=unicode).strip().replace('-', '')
					authors.append(author)
				return authors
		except:
			return

	def parse_isbn(self, root):
		try:
			i = self.book_property(root).index('ISBN:')
			isbn_nodes = ''.join(self.book_property(root)[i+1]).strip()
		except:
			return
		if isbn_nodes:
			return isbn_nodes

	def parse_publisher(self, root):
		try:
			publisher = None
			i = self.book_property(root).index(u'Kiad\xf3')
			publisher_node = ''.join(self.book_property(root)[i+2]).strip().title()
		except:
			return
		if publisher_node:
			return publisher_node.rpartition(':')[2].strip()

	def parse_published_date(self, root):
		try:
			i = self.book_property(root).index(u'Kiad\xe1s \xe9ve:')
			pub_date_node = ''.join(self.book_property(root)[i+1]).strip()
			default = datetime.utcnow()
			from calibre.utils.date import utc_tz
			default = datetime(default.year, default.month, 1, tzinfo=utc_tz)
			pub_date_node = parser.parse(pub_date_node, default=default)
		except:
			return
		if pub_date_node:
			return pub_date_node
			
	def parse_tags(self, root):
		try:
			tags_node = root.xpath('//*[@id="navigationBar"]//text()')
			tags_node = [text for text in tags_node if text.lstrip().replace('\t', '')]
			tags_node = tags_node[len(tags_node)-2].lower()
			if tags_node == 'további könyveink' or tags_node == 'egyéb' or tags_node == 'regény':
				tags_node = ''
			tags_node = tags_node.split(',')
		except:
			return
		if tags_node:
			return tags_node
	def parse_comments(self, root):
		try:
			description_node = root.xpath('//div[@id="tab_content_lead"]//text()')
		except:
			return
		if description_node:
			return ''.join(description_node).strip()

	def parse_cover(self, root):
		try:
			imgcol_node = ''.join(root.xpath('//*[@id="book"]//*[@class="cover"]//@src'))
		except:
			return
		if imgcol_node:
			return imgcol_node

