﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Hoffer Csaba <csaba.hoffer@gmail.com>'
__docformat__ = 'restructuredtext hu'

import socket, re
from threading import Thread
from calibre.ebooks.metadata.book.base import Metadata
import lxml, sys
import lxml.html as lh
from calibre.utils.date import utcnow
from datetime import datetime
from dateutil import parser
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre import browser


class Worker(Thread): # Get details

	'''
	Get book details from antikvarium.hu book page in a separate thread
	'''

	def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
		Thread.__init__(self)
		self.daemon = True
		self.url, self.result_queue = url, result_queue
		self.log, self.timeout = log, timeout
		self.relevance, self.plugin = relevance, plugin
		self.browser = browser.clone_browser()
		self.cover_url = self.antik_id = self.isbn = None

	def run(self):
		try:
			self.get_details()
		except:
			self.log.exception('get_details failed for url: %r'%self.url)

	def get_details(self):
		
		try:
			raw = self.browser.open_novisit(self.url, timeout=self.timeout)
		except Exception as e:
			if callable(getattr(e, 'getcode', None)) and \
					e.getcode() == 404:
				self.log.error('URL malformed: %r'%self.url)
				return
			attr = getattr(e, 'args', [None])
			attr = attr if attr else [None]
			if isinstance(attr[0], socket.timeout):
				msg = 'Antikvarium.hu timed out. Try again later.'
				self.log.error(msg)
			else:
				msg = 'Failed to make details query: %r'%self.url
				self.log.exception(msg)
			return

		root = lh.parse(raw)
		self.parse_details(root)

	def parse_details(self, root):
		search_data = ''
		try:
			antik_id = self.parse_antik_id(root)
			self.log.info('Parsed antik identifier:%s'%antik_id)
		except:
			self.log.exception('Error parsing Antikvarium id for url: %r'%self.url)
			antik_id = None

		try:
			title = self.parse_title(root)
			self.log.info('Parsed title:%s'%title)
		except:
			self.log.exception('Error parsing title for url: %r'%self.url)
			title = None
		
		try:
			authors = string_to_authors(self.parse_authors(root))
			self.log.info('Parsed authors:%s'%authors)
		except:
			self.log.exception('Error parsing authors for url: %r'%self.url)
			authors = []

		if not title or not authors or not antik_id:
			self.log.error('Could not find title/authors/Antikvarium.hu id for %r'%self.url)
			self.log.error('Antikvarium.hu id: %r Title: %r Authors: %r'%(antik_id, title, authors))
			return

		mi = Metadata(title, authors)
		mi.set_identifier('antik', antik_id)
		self.antik_id = antik_id

		try:
			isbn = self.parse_isbn(root)
			self.log.info('Parsed ISBN:%s'%isbn)
			if isbn:
				self.isbn = mi.isbn = isbn
		except:
			self.log.exception('Error parsing ISBN for url: %r'%self.url)

		try:
			series = self.parse_series(root)
			self.log.info('Parsed series:%s'%series)
		except :
			self.log.exception('Error parsing series for url: %r'%self.url)
			series = None
			
		try:
			mi.series_index = self.parse_series_index(root)
			self.log.info('Parsed series index:%s'%mi.series_index)
		except :
			self.log.exception('Error parsing series for url: %r'%self.url)
			mi.series_index = None
			
		try:
			mi.comments = self.parse_comments(root)
			self.log.info('Parsed comments:%s'%mi.comments)
		except:
			self.log.exception('Error parsing comments for url: %r'%self.url)

		try:
			self.cover_url = self.parse_cover(root)
			self.log.info('Parsed URL for cover:%r'%self.cover_url)
			self.plugin.cache_identifier_to_cover_url(self.antik_id, self.cover_url)
		except:
			self.log.exception('Error parsing cover for url: %r'%self.url)
		mi.has_cover = bool(self.cover_url)

		try:
			mi.publisher = self.parse_publisher(root)
			self.log.info('Parsed publisher:%s'%mi.publisher)
		except:
			self.log.exception('Error parsing publisher for url: %r'%self.url)
			
		try:
			mi.tags = self.parse_tags(root)
			self.log.info('Parsed tags:%s'%mi.tags)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)

		try:
			mi.pubdate = self.parse_published_date(root)
			self.log.info('Parsed publication date:%s'%mi.pubdate)
		except:
			self.log.exception('Error parsing published date for url: %r'%self.url)

		mi.source_relevance = self.relevance

		if series:
			mi.series = series

		if self.antik_id:
			if self.isbn:
				self.plugin.cache_isbn_to_identifier(self.isbn, self.antik_id)

		self.plugin.clean_downloaded_metadata(mi)

		self.result_queue.put(mi)

	def parse_antik_id(self, root):
		antik_id_node = root.xpath('/html/head/link/@href')
		antik_id_node = antik_id_node[0]
		antik_id_node = re.search('&ID=(.*)', antik_id_node).groups(0)[0]
		self.log.info('Parsed antikvarium.hu ID: %s'%antik_id_node)
		return antik_id_node
		
	def book_property(self, root, search_data):
		for i in range(0, 12):
			
			data = root.xpath('//*[@id="konyvadat_adatok"]//tr[%d]/td[1]//text()'%i)
			data=[text for text in data if text.strip()]
			data = '\n'.join(data)
			data = data.encode('utf-8')

			if data == search_data:
				data = root.xpath('//*[@id="konyvadat_adatok"]//tr[%d]/td[2]//text()'%i)
				data=[text for text in data if text.strip()]
				data = '\n'.join(data)
				return(data)
				break
			
			else:
				i=i+1
		
	def parse_title(self, root):
		
		search_data = "A könyv címe:"
		title_node = self.book_property(root, search_data)
		if title_node:
			return title_node
			
	def parse_series(self, root):
		search_data = "Sorozatcím:"
		series_node = self.book_property(root, search_data)
		if series_node:
			return series_node
		
	def parse_series_index(self, root):
		search_data = "Kötetsorszám:"
		series_index_node = self.book_property(root, search_data)
		if series_index_node:
			return series_index_node.strip()

	def parse_authors(self, root):
		search_data = " A könyvhöz kapcsolódó név/nevek:"
		author_nodes = self.book_property(root, search_data)
		author_nodes = author_nodes.replace(u'\xa0', '').strip().split('\n')
	
		try:
			i = author_nodes.index(u'(Szerz\u0151)')
			book_author = ''.join(author_nodes[i-1])
		except ValueError:
			book_author = ''
	
		if book_author:
			authors = []
			authors = book_author
			return authors

	def parse_isbn(self, root):
		search_data = "ISBN-szám:"
		isbn_nodes = self.book_property(root, search_data)
		if isbn_nodes:# != None:
			isbn_nodes = isbn_nodes.replace('-', '')
		else:
			isbn_nodes = ''
		if isbn_nodes:
			return isbn_nodes

	def parse_publisher(self, root):
		publisher = None
		search_data = "Kiadó:"
		publisher_node = self.book_property(root, search_data)
		if publisher_node:
			return publisher_node.rpartition(':')[2].strip()

	def parse_published_date(self, root):
		search_data = "A kiadás éve:"
		pub_date_node = self.book_property(root, search_data)
		default = datetime.utcnow()
		from calibre.utils.date import utc_tz
		default = datetime(default.year, default.month, 1, tzinfo=utc_tz)
		pub_date_node = parser.parse(pub_date_node, default=default)

		if pub_date_node:
			return pub_date_node
			
	def parse_tags(self, root):
		tags_node = root.xpath('//*[@id="szelesadatok"]/div[4]//li/a/text()')
		if tags_node:
			return tags_node
			
	def parse_comments(self, root):
		br = browser()
		br.open(self.url)
		book_comment = br.follow_link(url_regex='konyv-fulszoveg|konyv-eloszo', nr=0)
		doc = lh.parse(book_comment)
		#br.back()
		data = doc.xpath('/html/body/table//text()')
		description_node = ''.join(data).strip()
		if description_node:
			return description_node

	def parse_cover(self, root):
		BASE_URL = 'http://www.antikvarium.hu/ant/'
		book_cover = ''.join(root.xpath('//*[@id="konyvadat_foto"]//@href'))
		imgcol_node = BASE_URL + book_cover
		if imgcol_node:
			return imgcol_node

