﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
						print_function)

__license__   = 'GPL v3'
__copyright__ = '2017, Wrangly <andreatos@gmail.com>, Kloon <kloon@techgeek.co.in>'
__docformat__ = 'restructuredtext en'

import re
import time
import urllib
from Queue import Queue, Empty
from lxml.html import fromstring
from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.utils.cleantext import clean_ascii_chars
import lxml, sys, traceback
from calibre import browser


class Biblionet_gr(Source):
	name					= 'Biblionet_gr'
	description				= _('Downloads metadata and covers from Biblionet.gr')
	author					= 'Wrangly'
	version					= (1, 1, 0)
	minimum_calibre_version = (0, 8, 0)

	capabilities = frozenset(['identify', 'cover'])
	touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:biblionet_gr', 'tags', 'comments', 'publisher', 'pubdate'])
	has_html_comments = False
	supports_gzip_transfer_encoding = False
	can_get_multiple_covers = False
	


	BASE_URL = 'http://www.biblionet.gr' 
	BOOK_URL = BASE_URL + '/book/' 
	SEARCH_URL = BASE_URL + '/main.asp?page=results' 
	COVER_URL = BASE_URL + '/images/covers/b'
										
    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.biblionet_gr.config import ConfigWidget
        return ConfigWidget(self)	
	

	
	def create_query(self, log, title=None, authors=None, identifiers={}):
		import calibre_plugins.biblionet_gr.config as cfg	
        default_get_all_authors = cfg.DEFAULT_STORE_VALUES[cfg.KEY_GET_ALL_AUTHORS]
        get_all_authors = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_GET_ALL_AUTHORS, default_get_all_authors)	
	
		isbn = check_isbn(identifiers.get('isbn', None))
		if isbn is not None:
			search_page = Biblionet_gr.SEARCH_URL +"&summary=&isbn=" + '%s'%isbn
			
			return search_page
		if title is not None:
			search_title = urllib.quote(title.encode('utf-8'))
		else:
			search_title = ''
			
		if authors is not None:
			search_author = urllib.quote(authors[0].encode('utf-8'))
		else:
			search_author = ''
				
		
		if get_all_authors:
			personKind = '&PerKind=1'
		else:
			personKind = ''
			
			
			
		search_page = Biblionet_gr.SEARCH_URL+ '&title=' +'%s'%(search_title) + '&person=' + '%s'%(search_author) + '&person_ID=' + personKind
		
		return search_page
	def get_cached_cover_url(self, identifiers):
		url = None
		biblionet_id = identifiers.get('biblionet', None)
		if biblionet_id is None:
			isbn = check_isbn(identifiers.get('isbn', None))
			if isbn is not None:
				biblionet_id = self.cached_isbn_to_identifier(isbn)
		if biblionet_id is not None:
			url = self.cached_identifier_to_cover_url(biblionet_id)
		return url

	def identify(self, log, result_queue, abort, title, authors,
			identifiers={}, timeout=30):
		
		#Note this method will retry without identifiers automatically if no
		#match is found with identifiers.
		
		matches = []
		biblionet_id = identifiers.get('biblionet', None)
		log.info(u'\nTitle:%s\nAuthors:%s\n'%(title, authors))
		br = browser()
		if biblionet_id:
			matches.append(Biblionet_gr.BOOK_URL + biblionet_id)
		else:
			query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
			if query is None:
				log.error('Insufficient metadata to construct query')
				return
			try:
				log.info('Querying: %s'%query)
				response = br.open(query)
			except Exception as e:
				if isbn and callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
					log.info('Failed to find match for ISBN: %s'%isbn)
				else:
					err = 'Failed to make identify query: %r'%query
					log.exception(err)
					return as_unicode(e)
					
			try:
				raw = response.read().strip()
				raw = raw.decode('utf-8', errors='replace')
				if not raw:
					log.error('Failed to get raw result for query: %r'%query)
					return
				root = fromstring(clean_ascii_chars(raw))
			except:
				msg = 'Failed to parse biblionet.gr page for query: %r'%query
				log.exception(msg)
				return msg
			self._parse_search_results(log, title, authors, root, matches, timeout)
			
		if abort.is_set():
			return
		
		if not matches:
			if identifiers and title and authors:
				log.info('No matches found with identifiers, retrying using only'
						' title and authors')
				return self.identify(log, result_queue, abort, title=title,
						authors=authors, timeout=timeout)
			log.error('No matches found with query: %r'%query)
			return
			
		from calibre_plugins.biblionet_gr.worker import Worker
		workers = [Worker(url, result_queue, br, log, i, self) for i, url in
				enumerate(matches)]

		for w in workers:
			w.start()
			time.sleep(0.1)

		while not abort.is_set():
			a_worker_is_alive = False
			for w in workers:
				w.join(0.2)
				if abort.is_set():
					break
				if w.is_alive():
					a_worker_is_alive = True
			if not a_worker_is_alive:
				break

		return None
	def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
		import calibre_plugins.biblionet_gr.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]

		results = root.xpath('//*[@cellpadding="10"]')
		log.info('Found %d books (max: %d)'%(len(results), max_results))
		
		i = 0
		for result in results:
			book_urls = result.xpath('//a[contains(@href,"/book/")]//@href ') 
			for book_url in book_urls:
				m = re.search('(\d{3,6})', book_url)
				if m:
					pub = m.group()
					log.info('Book %d URL: %r'%(i+1, book_url))
					result_url = Biblionet_gr.BOOK_URL + pub
					if (result_url not in matches):
						matches.append(result_url)
						i += 1
					if (i >= max_results):
						return
	
	def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        if not title:
            return
        urls = self.get_image_urls(title, authors, identifiers, log, abort, timeout)
		self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
	
	def get_image_urls(self, title, authors, identifiers, log, abort, timeout):
        cached_url = self.get_cached_cover_url(identifiers)
		if cached_url is None:
			log.info('No cached cover found, running identify')
			rq = Queue()
			self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
			if abort.is_set():
				return
			results = []
			while True:
				try:
					results.append(rq.get_nowait())
				except Empty:
					break
			results.sort(key=self.identify_results_keygen(
				title=title, authors=authors, identifiers=identifiers))
			for mi in results:
				cached_url = self.get_cached_cover_url(mi.identifiers)
				if cached_url is not None:
					break
		
		if cached_url is not None:
			return cached_url
		
		log.info('No cover found')
		return []
	