﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)
import six
from six.moves import filterfalse

__license__   = 'GPL v3'
__copyright__ = '2011-2012, Hoffer Csaba <csaba.hoffer@gmail.com>, Kloon <kloon@techgeek.co.in>, 2020, Hokutya <mail@hokutya.com>'
__docformat__ = 'restructuredtext hu'

import urllib.request
from lxml import html

import socket, re, datetime
from threading import Thread
from lxml.html import fromstring, tostring
from calibre.ebooks.metadata.book.base import Metadata
import lxml, sys
import lxml.html as lh
from calibre.utils.date import utcnow
from datetime import datetime
from dateutil import parser
from calibre.ebooks.metadata import MetaInformation
from calibre import browser
from dateutil import parser

class Worker(Thread):
	'''Get book details from Libri.hu book page in a separate thread'''
	
	def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=30):
		Thread.__init__(self)
		self.daemon = True
		self.url, self.result_queue = url, result_queue
		self.log, self.timeout = log, timeout
		self.relevance, self.plugin = relevance, plugin
		self.browser = browser.clone_browser()
		self.cover_url = self.libri_id = self.isbn = None

	def run(self):
		try:
			self.get_details()
		except:
			self.log.exception('get_details failed for url: %r'%self.url)

	def get_details(self):
		try:
			raw = self.browser.open_novisit(self.url, timeout=self.timeout)
		except Exception as e:
			if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
				self.log.error('URL malformed: %r'%self.url)
				return
			attr = getattr(e, 'args', [None])
			attr = attr if attr else [None]
			if isinstance(attr[0], socket.timeout):
				msg = 'Libri timed out. Try again later.'
				self.log.error(msg)
			else:
				msg = 'Failed to make details query: %r'%self.url
				self.log.exception(msg)
			return
		root = lh.parse(raw)
		self.parse_details(root)

	def parse_details(self, root):
		try:
			libri_id = self.parse_libri_id(self.url)
			self.log.info('Parsed Libri identifier:%s'%libri_id)
		except:
			self.log.exception('Error parsing Libri id for url: %r'%self.url)
			libri_id = None
		try:
			title = self.parse_title(root)
			self.log.info('Parsed title: %s'%title)
		except:
			self.log.exception('Error parsing title for url: %r'%self.url)
			title = None
		try:
			authors = self.parse_authors(root)
			self.log.info('Parsed authors: %s'%authors)
		except:
			self.log.exception('Error parsing authors for url: %r'%self.url)
			authors = []
		if not title or not authors or not libri_id:
			self.log.error('Could not find title/authors/Libri id for url: %r'%self.url)
			self.log.error('Libri id: %r Title: %r Authors: %r'%(libri_id, title, authors))
			return
		mi = Metadata(title, authors)
		mi.set_identifier('libri_hu', libri_id)
		self.libri_id = libri_id
		try:
			isbn = self.parse_isbn(root)
			self.log.info('Parsed ISBN: %s'%isbn)
			if isbn:
				self.isbn = mi.isbn = isbn
		except:
			self.log.exception('Error parsing ISBN for url: %r'%self.url)
		try:
			series = self.parse_series(root)
			self.log.info('Parsed series: %s'%series)
		except :
			self.log.exception('Error parsing series for url: %r'%self.url)
			series = None
		try:
			mi.comments = self.parse_comments(root)
			self.log.info('Parsed comments: %s'%mi.comments)
		except:
			self.log.exception('Error parsing comments for url: %r'%self.url)
		try:
			mi.rating = self.parse_rating(root)
			self.log.info('Parsed rating: %r'%mi.rating)
		except:
			self.log.exception('Error parsing rating for url: %r'%self.url)
		try:
			#mi.language = self.parse_language(root)
			#self.log.info('Parsed language: %s'%mi.language)
			mi.languages = self.parse_languages(root)
			self.log.info('Parsed languages: %r'%mi.languages)
		except:
			self.log.exception('Error parsing language for url: %r'%self.url)
		try:
			self.cover_url = self.parse_cover(root)
			self.log.info('Parsed URL for cover: %r'%self.cover_url)
			self.plugin.cache_identifier_to_cover_url(self.libri_id, self.cover_url)
			mi.has_cover = bool(self.cover_url)
		except:
			self.log.exception('Error parsing cover for url: %r'%self.url)
		try:
			mi.publisher = self.parse_publisher(root)
			self.log.info('Parsed publisher: %s'%mi.publisher)
		except:
			self.log.exception('Error parsing publisher for url: %r'%self.url)
		try:
			mi.tags = self.parse_tags(root)
			self.log.info('Parsed tags: %s'%mi.tags)
		except:
			self.log.exception('Error parsing tags for url: %r'%self.url)
		try:
			mi.pubdate = self.parse_published_date(root)
			self.log.info('Parsed publication date: %s\n\n'%mi.pubdate)
		except:
			self.log.exception('Error parsing published date for url: %r\n\n'%self.url)
		mi.source_relevance = self.relevance
		if series:
			mi.series = series
		if self.libri_id and self.isbn:
			self.plugin.cache_isbn_to_identifier(self.isbn, self.libri_id)
		self.plugin.clean_downloaded_metadata(mi)
		self.result_queue.put(mi)

	def parse_libri_id(self, url):
		try:
			m = re.search('/konyv/(.*).html', url)
			if m:
				return m.group(1)
		except:
			return None
		
	def book_property(self, root):
		

	# XPath to the table you want to parse
		table_xpath = '//*[@id="productPageMainItem"]//table'

		# Find the table using the XPath
		table = root.xpath(table_xpath)[0]

		# Initialize an empty dictionary to store the book properties
		book_properties = {}

		# Iterate over each row in the table
		for row in table.findall('.//tr'):
			cells = row.findall('.//th') + row.findall('.//td')
			if len(cells) == 2:  # Ensuring that there are exactly two cells for key-value pairs
				key = cells[0].text_content().strip()
				value = cells[1].text_content().strip()
				book_properties[key] = value

		# Return the parsed book properties
		return book_properties

		

		
	def parse_title(self, root):
		try:
			#title_node = root.xpath('//*[@id="book"]/header/h1/text()') 						

			title_node = root.xpath('//*[@id="productPageMainItem"]//*[@class="h2 mb-2"]/text()')
			if title_node:
				title_text = title_node[0].strip()
				# if not root.xpath('//*[@id="book"]/header/h2[1]/a'):
				if not root.xpath('//*[@id="productPageMainItem"]/div/div/div[2]/h1/span'):
					#subtitle = root.xpath('//*[@id="book"]/header/h2[1]/text()')
					subtitle = root.xpath('//*[@id="productPageMainItem"]//*[@class="subtitle"]/text()') 
					if subtitle:
						self.log.info('Subtitle: %s'%subtitle[0])
						title_text = title_text + ' – ' + subtitle[0].strip()
				return title_text
		except:
			return None

	def parse_series(self, root):
		try:
			# Call book_property once and store the result to avoid calling it multiple times
			book_properties = self.book_property(root)

			# Access the 'Sorozat' key directly from the book_properties dictionary
			# Also, ensure that 'Sorozat' key exists in the dictionary to avoid KeyError
			if 'Sorozat' in book_properties:
				series_node = book_properties['Sorozat'].strip().title()

				# Check if series_node has a value
				if series_node:
					return series_node
			# If 'Sorozat' key doesn't exist or if series_node is empty, return None
			return None
		except Exception as e:
			# Optionally, print or log the exception message for debugging
			print(f"An error occurred: {e}")
			return None
		
	def parse_authors(self, root):
		try:
			#author_nodes = root.xpath('//*[@id="book"]/header[1]/h2/a') 
			author_nodes = root.xpath('//*[@id="productPageMainItem"]/div/div/div[2]/p[1]/a')
			
			if author_nodes:
				authors = []
				for author_value in author_nodes:
					author = tostring(author_value, method='text', encoding=six.text_type).strip().replace('-', '')
					authors.append(author)
				return authors
		except:
			return None

	def parse_isbn(self, root):
		try:
			book_properties = self.book_property(root)
			if 'ISBN' in book_properties:
				isbn_nodes = book_properties['ISBN'].strip().title()
			
				if isbn_nodes:
					return isbn_nodes
			return None
		except Exception as e:
			# Optionally, print or log the exception message for debugging
			print(f"An error occurred: {e}")
			return None

	def parse_publisher(self, root):
		try:
			# Call book_property once and store the result to avoid calling it multiple times
			book_properties = self.book_property(root)

			# Access the 'Kiadó' key directly from the book_properties dictionary
			# Also, ensure that 'Kiadó' key exists in the dictionary to avoid KeyError
			if 'Kiadó' in book_properties:
				publisher_node = book_properties['Kiadó'].strip().title()

				# Check if publisher_node has a value
				if publisher_node:
					return publisher_node
			# If 'Kiadó' key doesn't exist or if publisher_node is empty, return None
			return None
		except Exception as e:
			# Optionally, print or log the exception message for debugging
			print(f"An error occurred: {e}")
			return None

	def parse_published_date(self, root):
		try:
			# Get the book properties as a dictionary
			book_properties = self.book_property(root)

			# Directly access the 'Kiadás éve' property in the dictionary
			if 'Kiadás éve' in book_properties:
				pub_date_str = book_properties['Kiadás éve'].strip()
				default = datetime.utcnow()
				from calibre.utils.date import utc_tz
				default = datetime(default.year, default.month, default.day, tzinfo=utc_tz)

				# Parse the publication date string to a datetime object
				pub_date = parser.parse(pub_date_str, default=default)

				return pub_date
		except Exception as e:
			# It's a good practice to log or print the exception
			# to understand what went wrong if the try block fails
			print(f"Failed to parse published date: {e}")
			return None
			
	def parse_tags(self, root):
		try:
			tags_node = root.xpath('//*[@id="navigationBar"]//text()')
			if tags_node:
				#return [tag.strip() for tag in tags_node if tag.strip(' \r\n\t')][2:-1]
				return [tag.strip().lower() for tag in tags_node if tag.strip(' \r\n\t')]
		except:
			return None
		
	def parse_comments(self, root):
		try:
			# Use the XPath string() function to get all text within the element, including nested and separated by <br>
			description_text = root.xpath('string(//*[@id="product-description"])')

        # Check if description_text is not empty
			if description_text:
            # Strip leading and trailing whitespaces
				return description_text.strip()
		except Exception as e:
        # It's generally a good idea to log or print the exception to understand what went wrong
			print(f"An error occurred: {e}")
			return None

			
	def parse_rating(self, root):
		try:
			rating_node = root.xpath('//*[@id="productPageMainItem"]//*[@itemprop="ratingValue"]/@content')
			if rating_node:
				rating = rating_node[0].strip()
				return round(float(rating)) if rating != '' else float(0)
		except:
			return None
		
		
	def parse_languages(self, root):
		try:

			book_properties = self.book_property(root)
			if 'Nyelv' in book_properties:
				lang_node = book_properties['Nyelv'].strip().title()
			
			return [self._translateLanguageToCode(lang) for lang in lang_node.split('-')]
		except:
			return None

	def parse_cover(self, root):
		try:
			#imgcol_node = root.xpath('//*[@class="cover"]//a[1]/@href')
			imgcol_node = root.xpath('//*[@property="og:image"]/@content')
			if imgcol_node:
				return imgcol_node[0].strip()
		except:
			self.log.info('      -     imgcol_node hiba!')
			return None

	def _translateLanguageToCode(self, displayLang):
		displayLang = six.text_type(displayLang).strip() if displayLang else None
		langTbl = { None: 'und',
					u'magyar': 'hu', 
					u'angol': 'en', 
					u'amerikai': 'en',
					u'amerikai angol': 'en', 
					u'n\xe9met': 'de', 
					u'francia': 'fr',
					u'olasz': 'it', 
					u'spanyol': 'es',
					u'orosz': 'ru',
					u't\xf6r\xf6k': 'tr',
					u'g\xf6r\xf6g': 'gr',
					u'k\xednai': 'cn' }
		return langTbl.get(displayLang, None)
		