﻿
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Hoffer Csaba <csaba.hoffer@gmail.com>'
__docformat__ = 'restructuredtext hu'

import time
import urllib
from Queue import Queue, Empty
from lxml.html import fromstring
import lxml.etree as etree
from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.cleantext import clean_ascii_chars
import lxml, sys, traceback
from calibre import browser

class Moly(Source):
	name					= 'Moly'
	description				= _('Downloads metadata and covers from Moly.hu')
	author					= 'Hoffer Csaba, Gegesy Zsombor'
	version					= (1, 0, 5)
	minimum_calibre_version = (0, 8, 0)

	capabilities = frozenset(['identify', 'cover'])
	touched_fields = frozenset(['title', 'authors', 'identifier:moly', 'tags', 'comments', 'rating', 'series'])
	has_html_comments = False
	supports_gzip_transfer_encoding = False

	BASE_URL = "http://moly.hu/konyvek/"
	SITE_URL = "http://moly.hu"
	
	cover_urls = {}
	
	def config_widget(self):
		'''
		Overriding the default configuration screen for our own custom configuration
		'''
		from calibre_plugins.moly.config import ConfigWidget
		return ConfigWidget(self)
		
	def create_query(self, log, title=None, authors=None):
		if title is not None:
			search_title = urllib.quote(title.encode('utf-8'))
		else:
			search_title = ''
			
		if authors is not None:
			search_author = urllib.quote(authors[0].encode('utf-8'))
		else:
			search_author = ''

		search_page = 'http://moly.hu/kereses?q=%s+%s&x=0&y=0'%(search_author, search_title)
		
		return search_page
	def get_cached_cover_url(self, identifiers):
		url = None
		moly_id = identifiers.get('moly', None)
		if moly_id is None:
			isbn = check_isbn(identifiers.get('isbn', None))
			if isbn is not None:
				moly_id = self.cached_isbn_to_identifier(isbn)
		if moly_id is not None:
			url = self.cached_identifier_to_cover_url(moly_id)
			if url is None:
				url = self.get_cover_urls(moly_id)
		if url is not None:
			self.moly_url(url)
		else: return None
	def set_cover_urls(self, moly_id, urls):
		self.cover_urls[moly_id] = urls
	def get_cover_urls(self, moly_id):
		return self.cover_urls[moly_id]
	def get_cover_urls_by_identifiers(self, identifiers):
		if identifiers is not None:
			return self.cover_urls[identifiers.get('moly',None)]
		else: return None
	def moly_url(self, url):
		if url.startswith(Moly.SITE_URL):
			return url
		else: return Moly.SITE_URL + url
	def parse_response_to_xml(self, response, query):
		raw = response.read().strip()
		raw = raw.decode('utf-8', errors='replace')
		if not raw:
			log.error('Failed to get raw result for query: %r'%query)
			return
		return fromstring(clean_ascii_chars(raw))
		
	def identify(self, log, result_queue, abort, title, authors,
			identifiers={}, timeout=30):
		'''
		Note this method will retry without identifiers automatically if no
		match is found with identifiers.
		'''
		matches = []
		moly_id = identifiers.get('moly', None)
		log.info(u'\nTitle:%s\nAuthors:%s\n'%(title, authors))
		br = browser()
		if moly_id:
			matches.append(Moly.BASE_URL + moly_id)
		else:
			query = self.create_query(log, title=title, authors=authors)
			if query is None:
				log.error('Insufficient metadata to construct query')
				return
			try:
				log.info('Querying: %s'%query)
				response = br.open(query)
			except Exception as e:
				#if isbn and callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
				#	log.info('Failed to find match for ISBN: %s'%isbn)
				#else:
				err = 'Failed to make identify query: %r'%query
				log.exception(err)
				return as_unicode(e)
					
			try:
				root = self.parse_response_to_xml(response,query)
			except:
				msg = 'Failed to parse Moly page for query: %r'%query
				log.exception(msg)
				return msg
			log.info("raw result is %s"%root)
			self._parse_search_results(log, title, authors, root, matches, timeout)
			
		if abort.is_set():
			return
		
		if not matches:
			if identifiers and title and authors:
				log.info('No matches found with identifiers, retrying using only'
						' title and authors')
				return self.identify(log, result_queue, abort, title=title,
						authors=authors, timeout=timeout)
			log.error('No matches found with query: %r'%query)
			return
			
		from calibre_plugins.moly.worker import Worker
		workers = [Worker(url, result_queue, br, log, i, self) for i, url in
				enumerate(matches)]
		for w in workers:
			w.start()
			time.sleep(0.1)
		while not abort.is_set():
			a_worker_is_alive = False
			for w in workers:
				w.join(0.2)
				if abort.is_set():
					break
				if w.is_alive():
					a_worker_is_alive = True
			if not a_worker_is_alive:
				break
		return None
	def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
		results = root.xpath('//*[@id="main"]//div[@id="content"]/table/tr/td//p')
		import calibre_plugins.moly.config as cfg
		max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
		i = 0
		found = set()
		for result in results:
			book_urls = result.xpath('a/@href')
			for book_url in book_urls:
				log.info('Book URL:%r'%book_url)
				result_url = Moly.SITE_URL + book_url
				if (result_url not in found):
					matches.append(result_url)
					found.add(result_url)
				if len(found) >= max_results:
					break
	def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
		cached_url_list = self.get_cover_urls_by_identifiers(identifiers)
		if cached_url_list is None:
			log.info('No cached cover found, running identify')
			rq = Queue()
			self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
			if abort.is_set():
				return
			results = []
			while True:
				try:
					results.append(rq.get_nowait())
				except Empty:
					break
			results.sort(key=self.identify_results_keygen(
				title=title, authors=authors, identifiers=identifiers))
			cached_url_list = []
			for mi in results:
				cached_url = self.get_cover_urls_by_identifiers(mi.identifiers)
				if cached_url is not None:
					cached_url_list += cached_url
		if len(cached_url_list) == 0:
			log.info('No cover found')
			return

		if abort.is_set():
			return
		br = self.browser
		log.info('Downloading cover from:', cached_url_list)
		for cached_url in cached_url_list:
			try:
				m_url = self.moly_url(cached_url)
				cdata = br.open_novisit(m_url, timeout=timeout).read()
				log.info("from %r result length is %r"%(m_url, len(cdata)))
				result_queue.put((self, cdata))
			except:
				log.exception('Failed to download cover from:', cached_url)
			if abort.is_set():
				return
	def get_book_url(self, identifiers):
		moly_id = identifiers.get('moly', None)
		if moly_id is not None:
			return Moly.BASE_URL + moly_id
		return None
