﻿#!/usr/bin/env python
# vim:fileencoding=utf-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2011-2018, Hoffer Csaba <csaba.hoffer@gmail.com>, Kloon <kloon@techgeek.co.in>, otapi <otapigems.com>, 2020.jun Hokutya <mail@hokutya.com>'
__docformat__ = 'restructuredtext hu'

import time
from Queue import Queue, Empty
from lxml.html import fromstring
from calibre import as_unicode
import lxml.etree as etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
import lxml, sys, traceback
from calibre import browser
import urllib
from lxml.html import tostring
from string import maketrans   # Required to call maketrans function.

class Antikvarium_hu(Source):
	name					= 'Antikvarium_hu'
	description				= _('Downloads metadata and cover from antikvarium.hu')
	author					= 'Hoffer Csaba & Kloon & otapi & Hokutya'
	version					= (2, 0, 4)
	minimum_calibre_version = (0, 8, 0)

	capabilities = frozenset(['identify', 'cover'])
	touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:antik_hu', 'tags', 'comments', 'publisher', 'pubdate', 'series', 'language', 'languages'])
	has_html_comments = False
	supports_gzip_transfer_encoding = False
	can_get_multiple_covers = False

	KEY_MAX_DOWNLOADS = 'maxDownloads'
	KEY_MAX_COVERS = 'max_covers'
	
	options = (Option(KEY_MAX_DOWNLOADS, 'number', 5, _('Maximum number of books to get'),
                      _('The maximum number of books to process from the Antikvarium search result')),
				Option(KEY_MAX_COVERS, 'number', 5, _('Maximum number of covers to get'),
                      _('The maximum number of covers to process for the chosen book'))
	)
	
	BASE_URL = 'https://www.antikvarium.hu'
	BOOK_URL = BASE_URL + '/konyv/'
	
	def create_query(self, log, title=None, authors=None, identifiers={}):
		if title is not None:
			search_title = urllib.quote(title.encode('utf-8'))
		else:
			search_title = ''
		log.info(' Title: %s'%search_title)
		if authors is not None:
			search_author = urllib.quote(authors[0].encode('utf-8'))
		else:
			search_author = ''
		log.info(' Author: %s'%search_author)
		search_page = "https://www.antikvarium.hu/index.php?type=search&kc=%s&sz=%s&he=0&jk=0&reszletes=1&rend=kiadasevecsokk&oldaldb=60&kapelol=0&nezet=li&elist=egyebadat&interfaceid=102&oldalcount=1"%(search_title, search_author)
		return search_page
		
	def get_cached_cover_url(self, identifiers):
		url = None
		antik_id = identifiers.get('antik_hu', None)
		if antik_id is None:
			isbn = identifiers.get('isbn', None)
			if isbn is not None:
				antik_id = self.cached_isbn_to_identifier(isbn)
		if antik_id is not None:
			url = self.cached_identifier_to_cover_url(antik_id)
		return url
		
	def cached_identifier_to_cover_url(self, id_):
		with self.cache_lock:
			url = self._get_cached_identifier_to_cover_url(id_)
			if not url:
				# Try for a "small" image in the cache
				url = self._get_cached_identifier_to_cover_url('small/'+id_)
			return url
			
	def _get_cached_identifier_to_cover_url(self, id_):
		# This must only be called once we have the cache lock
		url = self._identifier_to_cover_url_cache.get(id_, None)
		if not url:
			# We could not get a url for this particular B&N id
			# However we might have one for a different isbn for this book
			# Barnes & Noble are not very consistent with their covers and
			# it could be that the particular ISBN we chose does not have
			# a large image but another ISBN we retrieved does.
			key_prefix = id_.rpartition('/')[0]
			for key in self._identifier_to_cover_url_cache.keys():
				if key.startswith('key_prefix'):
					return self._identifier_to_cover_url_cache[key]
		return url
		
	def identify(self, log, result_queue, abort, title, authors,
			identifiers={}, timeout=30):
		'''
		Note this method will retry without identifiers automatically if no
		match is found with identifiers.
		'''
		matches = []
		#eredeti
		#antik_id = identifiers.get('antik_hu', None)
		#isbn = check_isbn(identifiers.get('isbn', None))
		antik_id = ''
		isbn = ''
		br = browser()
		log.info(u'\nTitle:%s\nAuthors:%s\n'%(title, authors))
		if antik_id:
			matches.append('%s%s'%(Antikvarium_hu.BOOK_URL, antik_id))
		else:
			if isbn:
				matches.append('https://www.antikvarium.hu/index.php?type=search&isbn=%s'%(isbn))
			else:
				query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
				if query is None:
					log.error('Insufficient metadata to construct query')
					return
				try:
					log.info('Querying: %s'%query)
					response = br.open(query)
				except Exception as e:
					if isbn and callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
						# We did a lookup by ISBN but did not find a match
						# We will fallback to doing a lookup by title author
						log.info('Failed to find match for ISBN: %s'%isbn)
					else:
						err = 'Failed to make identify query: %r'%query
						log.exception(err)
						return as_unicode(e)
				try:
					raw = response.read().strip()
					raw = raw.decode('utf-8', errors='replace')
					if not raw:
						log.error('Failed to get raw result for query: %r'%query)
						return
					root = fromstring(clean_ascii_chars(raw))
				except:
					msg = 'Failed to parse Antikvarium.hu page for query: %r'%query
					log.exception(msg)
					return msg
				self._parse_search_results(log, title, authors, root, matches, timeout)
		if abort.is_set():
			return
		if not matches:
			if identifiers and title and authors:
				log.info('No matches found with identifiers, retrying using only'
						' title and authors')
				return self.identify(log, result_queue, abort, title=title,
						authors=authors, timeout=timeout)
			log.error('No matches found with query: %r'%query)
			return
		from calibre_plugins.antikvarium_hu.worker import Worker
		workers = [Worker(url, result_queue, br, log, i, self) for i, url in
				enumerate(matches)]
		for w in workers:
			w.start()
			# Don't send all requests at the same time
			time.sleep(0.1)
		while not abort.is_set():
			a_worker_is_alive = False
			for w in workers:
				w.join(0.2)
				if abort.is_set():
					break
				if w.is_alive():
					a_worker_is_alive = True
			if not a_worker_is_alive:
				break
		return None

	def _parse_search_results(self, log, title, authors, root, matches, timeout):
		max_results = self.prefs[Antikvarium_hu.KEY_MAX_DOWNLOADS]
		results = root.xpath('//*[@class="book-data-holder-list"]')
		i = 0
		for result in results:
			urls = result.xpath('//*[@id="searchResultKonyvCim-listas"]/@href')
			book_url = 'https://www.antikvarium.hu/' + urls[0]
			log.info('Book URL: %r'%book_url)
			titlenode = result.xpath('//*[@id="searchResultKonyvCim-listas"]/span')[0]
			n_title = '%s'%titlenode.text_content()
			log.info('Book title: %s'%n_title)
			authorenode = result.xpath('//*[@id="searchResultKonyvSzerzo-listas"]')[0]
			etree.strip_tags(authorenode, 'snap')
			n_author = '%s'%authorenode.text_content()
			log.info('Book author: %s'%n_author)
			if title:
				if title.lower() not in n_title.lower() and self.strip_accents(title) not in self.strip_accents(n_title):
					continue
			if authors:
				author1 = authors[0]
				authorsplit = author1.split(" ")
				author2 = author1
				if len(authorsplit) > 1:
					author2 = '%s %s'%(authorsplit[1], authorsplit[0])
				if author1.lower() not in n_author.lower() and self.strip_accents(author1) not in self.strip_accents(n_author) and author2.lower() not in n_author.lower() and self.strip_accents(author2) not in self.strip_accents(n_author):
					continue
			for bookurls in urls:
				result_url = 'https://www.antikvarium.hu/' + bookurls
				if (result_url not in matches):
					matches.append(result_url)
					i += 1
				if (i >= max_results):
					return
		if i==0:
			for result in results:
				urls = result.xpath('//*[@id="searchResultKonyvCim-listas"]/@href')
				for bookurls in urls:
					result_url = 'https://www.antikvarium.hu/' + bookurls
					if (result_url not in matches):
						matches.append(result_url)
						i += 1
					if (i >= max_results):
						return

	def strip_accents(self, s):
		symbols = (u"öÖüÜóÓőŐúÚéÉáÁűŰíÍ",
					u"oOuUoOoOuUeEaAuUiI")
		tr = dict( [ (ord(a), ord(b)) for (a, b) in zip(*symbols) ] )
		return s.translate(tr).lower()

	def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
		cached_url = self.get_cached_cover_url(identifiers)
		if cached_url is None:
			log.info('No cached cover found, running identify')
			rq = Queue()
			self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
			if abort.is_set():
				return
			results = []
			while True:
				try:
					results.append(rq.get_nowait())
				except Empty:
					break
			results.sort(key=self.identify_results_keygen(
				title=title, authors=authors, identifiers=identifiers))
			for mi in results:
				cached_url = self.get_cached_cover_url(mi.identifiers)
				if cached_url is not None:
					break
		if cached_url is None:
			log.info('No cover found')
			return
		if abort.is_set():
			return
		br = self.browser
		log.info('Downloading cover from:', cached_url)
		try:
			cdata = br.open_novisit(cached_url, timeout=timeout).read()
			result_queue.put((self, cdata))
		except:
			log.exception('Failed to download cover from:', cached_url)