#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, Roman Cupisz <roman.cupisz+calibre@gmail.com>'
__docformat__ = 'restructuredtext en'

import time, string
from urllib import quote
from Queue import Queue, Empty
from collections import OrderedDict

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
import lxml, sys, traceback
from calibre.utils.localization import get_udc

from string import maketrans

class LubimyCzytac(Source):

    name                    = 'LubimyCzytac'
    description             = _('Pobiera metadane i okładki z LubimyCzytac.pl')
    author                  = 'Roman Cupisz'
    version                 = (2, 0, 0)
    minimum_calibre_version = (2, 0, 0)


    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:lubimyczytac',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate',
        'series', 'tags', 'languages'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    BASE_URL = 'http://lubimyczytac.pl'


    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.lubimyczytac.config import ConfigWidget
        return ConfigWidget(self)


    def get_book_url(self, identifiers):
        lubimy_czytac_id = identifiers.get('lubimyczytac', None)
        if lubimy_czytac_id:
            url = '%s/ksiazka/%s/'%(LubimyCzytac.BASE_URL, lubimy_czytac_id)
            return ('lubimyczytac', lubimy_czytac_id, url)

    def create_query(self, log, title=None, authors=None, identifiers={}):

        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
        ta=0
        if isbn is not None:
            return '%s/szukaj/ksiazki?phrase=%s'%(LubimyCzytac.BASE_URL, isbn)
        if title:
            ta=1
            title = title.replace('?','')
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
                q = '+'.join(tokens)
	            return 'http://lubimyczytac.pl/szukaj/ksiazki?phrase=' + q + '&main_search=1'
        if authors:
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                if ta==1:
                    q += '+'
                tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
                q += '+'.join(tokens)
        if not q:
            return None
        return 'http://lubimyczytac.pl/szukaj/ksiazki?phrase=' + q + '&main_search=1'

    def get_cached_cover_url(self, identifiers):
        url = None
        lubimy_czytac_id = identifiers.get('lubimyczytac', None)
        if lubimy_czytac_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                lubimy_czytac_id = self.cached_isbn_to_identifier(isbn)
        if lubimy_czytac_id is not None:
            url = self.cached_identifier_to_cover_url(lubimy_czytac_id)
        return url

    def cached_identifier_to_cover_url(self, id_):
        with self.cache_lock:
            url = self._get_cached_identifier_to_cover_url(id_)
            if not url:
                # Try for a "small" image in the cache
                url = self._get_cached_identifier_to_cover_url('small/'+id_)
            return url

    def _get_cached_identifier_to_cover_url(self, id_):
        # This must only be called once we have the cache lock
        url = self._identifier_to_cover_url_cache.get(id_, None)
        return url

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
        # If we have a LubimyCzytac.pl id then we do not need to fire a "search"
        # at lubimyczytac.pl. Instead we will go straight to the URL for that book.
        lubimy_czytac_id = identifiers.get('lubimyczytac', None)
		log.info(u'\nTitle:%s\nAuthors:%s\n'%(title, authors))
        isbn = check_isbn(identifiers.get('isbn', None))
        br = self.browser
        if lubimy_czytac_id:
            matches.append('%s/ksiazka/%s/'%(LubimyCzytac.BASE_URL, lubimy_czytac_id))
        else:
            if title is not None:
                title = get_udc().decode(title)
            else:
                title = ''
            if authors is not None:
                authors = [get_udc().decode(a) for a in authors]
            query = self.create_query(log, title=title, authors=authors,
                    identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
##            isbn_match_failed = False
            try:
                log.info('Zapytanie: %s'%query)
                response = br.open_novisit(query, timeout=timeout)

            except Exception as e:
                if isbn and callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                    # We did a lookup by ISBN but did not find a match
                    # We will fallback to doing a lookup by title author
                    log.info('Failed to find match for ISBN: %s'%isbn)
                elif callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                    log.error('No matches for identify query')
                    return as_unicode(e)
            # Anything from this point below is for title/author based searches.
            if not lubimy_czytac_id:
                try:
                    raw = response.read().strip()
                    #open('D:\\Tools\\WORKSPACE\\LubimyCzytac\\log_rc1.html', 'wb').write(raw)
                    raw = raw.decode('utf-8', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query')
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse LubimyCzytac.pl page for query'
                    log.exception(msg)
                    return msg
                # Now grab the matches from the search results, provided the
                # title and authors appear to be for the same book
                self._parse_search_results(log, title, authors, root, matches, timeout)

        if abort.is_set():
            return

        if not matches:
            if identifiers and title and authors:
                log.info('No matches found with identifiers, retrying using only'
                        ' title and authors')
                return self.identify(log, result_queue, abort, title=title,
                        authors=authors, timeout=timeout)
            log.error('No matches found with query: %r'%query)
            return

		log.debug('Starting workers for: %s' % (matches,))
        from calibre_plugins.lubimyczytac.worker import Worker
        workers = [Worker(url, result_queue, br, log, i, self) for i, url in
				enumerate(matches) if url]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
        results = root.xpath('*//div[contains(@class,"book-data")]')
        import calibre_plugins.lubimyczytac.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME][cfg.KEY_MAX_DOWNLOADS]
        found_title = False
        no_matches = []
        log.info('Parsing results: %s ' % results )
        i = 0
        for result in results:
            i = i + 1
            log.info('Parsing result %s: %s ' % (i,result))
            title = result.xpath('./div[contains(@class,"book-general-data")]//a[@class="bookTitle"]//text()')
            book_url = result.xpath('./div[contains(@class,"book-general-data")]/a[@class="bookTitle"]/@href')
            authors = result.xpath('./div[contains(@class,"book-general-data")]//a[contains(@href,"autor")]//text()')
            if not title or not book_url or not authors:
                continue
            title = title[0]
            book_url = book_url[0]
            author = authors[0]
            log.info('[%s] Original title: %s, Title: %s, Original authors: %s, Authors:%s, nBook url: %s' % (i, orig_title, title, orig_authors, authors, book_url))
            if (self.match(title,orig_title) and self.contains(authors,orig_authors)):
                matches.append(book_url)
                log.info('[%s] matches title and autors' % (i))
                break
            if self.match(title,orig_title):
                matches.append(book_url)
                log.info('[%s] matches title ' % (i))
            else:
                log.info('match authors %s %s  to orig_authors %s %s'%(authors,type(authors),orig_authors,type(orig_authors)))
                if self.contains(authors,orig_authors):
                    matches.append(book_url)
                    log.info('[%s] matches autors' % (i))
                else:
                    no_matches.append(book_url)
                    log.info('[%s] no matches' % (i))
            if len(matches) >= max_results:
                log.info('reached max results limit: %s' % (max_results))
                break
        if no_matches and not matches:
            matches.extend(no_matches)

    def match(self, item1, item2):
        if not item2:
            return False
        if isinstance(item1, (str, unicode)) and isinstance(item2, (str, unicode)):
            return item1 == item2 or item1 in item2 or item2 in item1

    def contains(self, list1, list2):
        if not list1 or not list2:
            return False
        for el in list1:
            if any(el in s for s in list2):
                return True
        return False

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
		log.info('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin, title_test, authors_test, series_test)
    test_identify_plugin(LubimyCzytac.name,
        [
            (
                {
                    'title':'Harry Potter i Kamień Filozoficzny',
                    'authors':['Joanne Kathleen Rowling']
                },
                [
                    title_test('Harry Potter i Kamień Filozoficzny'),
                    authors_test(['Joanne Kathleen Rowling']),
                    series_test('Harry Potter', 1.0)
                ]
            ),
            (
              {
                 'title':'Baranek',
                 'authors':['Christopher Moore']
              },
              [
                 title_test('Baranek'),
                 authors_test(['Christopher Moore']),
              ]
            )
        ]
    )


