#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
# *-* coding: utf-8 *-*
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2019, Daniel Prazak <kret33n@gmail.com>, 2020, Jindroush <jindroush@seznam.cz>'
__docformat__ = 'restructuredtext cs'

import time
try:
    from urllib.parse import quote
except ImportError:
    from urllib2 import quote

try:
    from queue import Empty, Queue
except ImportError:
    from Queue import Empty, Queue

from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from lxml.html import fromstring
from calibre.utils.cleantext import clean_ascii_chars

def load_url(log, query, br):
    try:
        log.info('Querying: %s' % query)
        response = br.open_novisit(str(query))
    except Exception as e:
        log.exception(e)
        raise Exception('Failed to make identify query: %r - %s ' % (query, e))
    try:
        raw = response.read().strip().decode('utf-8', errors='replace')
        if not raw:
            log.error('Failed to get raw result for query: %r' % query)
            raise Exception('Failed to get raw result for query: %r' % query)
        root = fromstring(clean_ascii_chars(raw))
    except:
        msg = 'Failed to parse page for query: %r' % query
        log.exception(msg)
        raise Exception(msg)
    return root, response

def remove_diacritic(text):
    """
    Remove diacritics from text
    """
    import unicodedata
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
def remove_punctuation(text):
    """
    Remove punctuation from text
    """
    import string
    return text.translate(str.maketrans('', '', string.punctuation))

try:
    load_translations()
except NameError:
    pass # load_translations() added in calibre 1.9

class databazeknih(Source):
    name                    = 'su-databazeknih.cz'
    description             = _('Downloads metadata and covers from databazeknih.')
    author                  = 'seeder, soboli.ucho'
    version                 = (1, 5, 18)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:databazeknih', 'identifier:dbknih', 'identifier:dbk',
                                'identifier:isbn', 'identifier:databazeknih_povidka', 'tags', 'comments', 'rating',
                                'series', 'series_index', 'publisher', 'pubdate', 'languages'])
    has_html_comments = True
    can_get_multiple_covers = True

    config_message = _('Plugin version: <b>%s</b> - Report errors and suggestions through <a href="https://www.mobileread.com/forums/showthread.php?t=356548">MobileRead</a> forum.')%str(version).strip('()').replace(', ', '.')

    BASE_URL = "https://www.databazeknih.cz/"

    def config_widget(self):
        '''
        Overriding the default configuration screen for our own custom configuration
        '''
        from calibre_plugins.databazeknih.config import ConfigWidget
        return ConfigWidget(self)
        
    def get_book_url(self, identifiers):
        databazeknih_id = identifiers.get('databazeknih', None)
        if databazeknih_id:
            return ('databazeknih', databazeknih_id, ''.join([databazeknih.BASE_URL, 'knihy/--', databazeknih_id]))

        dbknih_id = identifiers.get('dbknih', None)
        if dbknih_id:
            return ('dbknih', dbknih_id, ''.join([databazeknih.BASE_URL, 'knihy/--', dbknih_id]))

        dbk_id = identifiers.get('dbk', None)
        if dbk_id:
            return ('dbk', dbk_id, ''.join([databazeknih.BASE_URL, 'knihy/--', dbk_id]))
        
        databazeknih_povidka_id = identifiers.get('databazeknih_povidka', None)
        if databazeknih_povidka_id:
            return ('databazeknih_povidka', databazeknih_povidka_id, ''.join([databazeknih.BASE_URL, 'povidky/--', databazeknih_povidka_id]))
        return None

    def create_query(self, log, title=None, authors=None, tales=False, search_engine='databazeknih'):
        if title is None:
            title = ''
        if authors is None:
            authors = ''
        elif isinstance(authors, list):
            discard = ['Unknown', 'Neznámý']
            for d in discard:
                if d in authors:
                    authors.remove(d)
            authors = ' '.join(authors)

        search_page = ''
        if search_engine == 'databazeknih':
            if tales:
                search_page = ''.join([databazeknih.BASE_URL, 'search?in=tales&q={title}+{authors}'])
            else:
                search_page = ''.join([databazeknih.BASE_URL, 'search?q={title}+{authors}'])
        elif search_engine == 'google':
            if tales:
                search_page = ''.join(['https://www.google.com/search?q=site:', databazeknih.BASE_URL, 'povidky/ {title}+{authors}'])
            else:
                search_page = ''.join(['https://www.google.com/search?q=site:', databazeknih.BASE_URL, 'knihy/ {title}+{authors}'])

        return search_page.format(title=quote(title.encode('utf-8')),
                                  authors=quote(authors.encode('utf-8')))

    def get_cached_cover_url(self, identifiers):
        url = None
        databazeknih_id = identifiers.get('dbk', None)
        databazeknih_id = identifiers.get('dbknih', databazeknih_id)
        databazeknih_id = identifiers.get('databazeknih', databazeknih_id)
        if databazeknih_id is None:
            isbn = check_isbn(identifiers.get('isbn', None))
            if isbn is not None:
                databazeknih_id = self.cached_isbn_to_identifier(isbn)
        if databazeknih_id is not None:
            url = self.cached_identifier_to_cover_url(databazeknih_id)
            return url
        
    def search_title_for_metadata(self, title, identifiers):
        meta_dict = dict()
        if not title:
            return title, identifiers, meta_dict
        import re
        search_regex = r"(?:(?:isbn|ean|dbk|dbknih|databazeknih|publisher|pubdate|pubyear|databazeknih_povidka|dbknih_povidka|dbk_povidka|dbkp):(?:\S*)(?: |$))"
        meta_title = re.findall(search_regex, title)
        # Remove matched metadata from title
        title = re.sub(pattern=search_regex, string=title, repl='')
        title = ' '.join(title.split())

        meta_dict = dict([i.rstrip(' ').split(':', 1) for i in meta_title])

        identifiers_mapping = {
            'databazeknih': ['databazeknih', 'dbknih', 'dbk'],
            'databazeknih_povidka': ['databazeknih_povidka', 'dbknih_povidka', 'dbk_povidka', 'dbk_p', 'dbkp'],
            'isbn': ['isbn', 'ean']
        }
        for identifier, keys in identifiers_mapping.items():
            for key in keys:
                value = meta_dict.get(key, None)
                if value is not None:
                    identifiers[identifier] = value

        meta_dict_mapping = {
            'pubdate': ['pubdate', 'pubyear'],
            'publisher': ['publisher'],
        }
        remapped_meta_dict = dict()
        for identifier, keys in meta_dict_mapping.items():
            for key in keys:
                value = meta_dict.get(key, None)
                if value is not None:
                    remapped_meta_dict[identifier] = value
        meta_dict = remapped_meta_dict

        if identifiers.get('pubdate', None) and meta_dict.get('pubdate', None) is None:
            meta_dict['pubdate'] = identifiers['pubdate']
            identifiers.pop('pubdate')
        if identifiers.get('pubyear', None) and meta_dict.get('pubyear', None) is None:
            meta_dict['pubdate'] = identifiers['pubyear']
            identifiers.pop('pubyear')
        if identifiers.get('publisher', None) and meta_dict.get('publisher', None) is None:
            meta_dict['publisher'] = identifiers['publisher']
            identifiers.pop('publisher')

        if identifiers.get('dbk', None):
            identifiers['databazeknih'] = identifiers['dbk']
            identifiers.pop('dbk')
        if identifiers.get('dbknih', None):
            identifiers['databazeknih'] = identifiers['dbknih']
            identifiers.pop('dbknih')
        return title, identifiers, meta_dict

    def identify(self, log, result_queue, abort, title, authors,
            identifiers={}, timeout=30):
        matches = []
        no_matches = []
        query = None
        br = self.browser

        # search for identifiers and extra metadata in title field format identifier:123456; e.g. databazeknih:1234, pubdate:2023
        title, identifiers, meta_dict = self.search_title_for_metadata(title, identifiers)
        log('identifiers:', identifiers, 'meta_dict:', meta_dict)
        databazeknih_id = identifiers.get('databazeknih', None)
        isbn = check_isbn(identifiers.get('isbn', None))

        import calibre_plugins.databazeknih.config as cfg
        dbknih_id_search = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.IDENTIFIER_SEARCH, cfg.DEFAULT_STORE_VALUES[cfg.IDENTIFIER_SEARCH])
        isbn_search = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.ISBN_SEARCH, cfg.DEFAULT_STORE_VALUES[cfg.ISBN_SEARCH])
        tales_search = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.TALES_SEARCH, cfg.DEFAULT_STORE_VALUES[cfg.TALES_SEARCH])
        max_results = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_MAX_DOWNLOADS, cfg.DEFAULT_STORE_VALUES[cfg.KEY_MAX_DOWNLOADS])
        google_engine = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.GOOGLE_SEARCH, cfg.DEFAULT_STORE_VALUES[cfg.GOOGLE_SEARCH])

        log.debug(u'\tTitle:%s\tAuthors:%s\t'%(title, authors))

        # search via databazeknih identifier
        exact_match = False
        if databazeknih_id and dbknih_id_search:
            try:
                response = br.open_novisit(''.join([databazeknih.BASE_URL, 'zajimavosti-knihy/--', databazeknih_id]))
                if response.geturl().find(databazeknih_id) != -1:
                    matches.append(databazeknih.BASE_URL + 'zajimavosti-knihy/--' + databazeknih_id)
                    exact_match = True
                else:
                    log.error('Wrong dbk identifier was inserted.\nContinuing with ISBN or Title/Author(s) search.')
            except:
                log.error('Could not open book page. Wrong URL inserted.')
        
        # search via isbn identifier
        if not exact_match and isbn and isbn_search:
            _, response = load_url(log, ''.join([databazeknih.BASE_URL, 'search?q=', isbn]), br)
            if response.geturl().find(isbn) == -1 and response.geturl().find('/knihy/'):
                matches.append(''.join([response.geturl().replace('/knihy/', '/zajimavosti-knihy/')]))
                exact_match = True
            else:
                log.error('ISBN was not recognized.\nContinuing with Title/Author(s) search.')

        ## TALES searching
        # try search in tales (databazeknih_povidka identifier)
        databazeknih_povidka_id = identifiers.get('databazeknih_povidka', None)
        if databazeknih_povidka_id and dbknih_id_search:
            try:
                response = br.open_novisit(''.join([databazeknih.BASE_URL, 'povidky/--', databazeknih_povidka_id]))
                if response.geturl().find(databazeknih_povidka_id) != -1:
                    matches.append(databazeknih.BASE_URL + 'povidky/--' + databazeknih_povidka_id)
                    exact_match = True
                else:
                    log.error('Wrong dbk tale identifier was inserted.\nContinuing with tale Title/Author(s) search.')
            except:
                log.error('Wrong dbk tale identifier was inserted.\nContinuing with tale Title/Author(s) search.')
        


        ## GOOGLE Search
        try:
            if not exact_match and len(matches) < max_results and google_engine:
                query = self.create_query(log, title=title, authors=authors, search_engine='google')
                root, response = load_url(log, query, br)
                log.debug(u'Querying via google: %s'%query)

                br.set_simple_cookie('CONSENT', 'PENDING+987', '.google.com', path='/')
                template = b'\x08\x01\x128\x08\x14\x12+boq_identityfrontenduiserver_20231107.05_p0\x1a\x05en-US \x03\x1a\x06\x08\x80\xf1\xca\xaa\x06'
                from datetime import date
                from base64 import standard_b64encode
                template.replace(b'20231107', date.today().strftime('%Y%m%d').encode('ascii'))
                br.set_simple_cookie('SOCS', standard_b64encode(template).decode('ascii').rstrip('='), '.google.com', path='/')
                root, response = load_url(log, query, br)
                self._parse_google_search_results(log, title, authors, root, matches, no_matches, timeout)
        except Exception as e:
            log.debug(u'Error while Google searching: %s'%e)

        ## best DBK search try (whole title + only first author's last name)
        author = ''
        if authors:
            author = authors[0]
            if ', ' in author:
                author = author.split(', ')[0]
            else:
                author = author.split(' ')[-1]
        if not exact_match and len(matches) < max_results and author:
            query = self.create_query(log, title=title, authors=author)
            log.debug('Querying for books best try (title + lastname)\n Query: %s'%query)
            root, response = load_url(log, query, br)
            self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)
            query_sk = '%s&lang=sk'%query
            root, response = load_url(log, query_sk, br)
            self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)
        if not exact_match and author:
            query = self.create_query(log, title=author, authors=title)
            log.debug('Querying for books best try again but reverse (lastname + title)\n Query: %s'%query)
            root, response = load_url(log, query, br)
            self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)
            query_sk = '%s&lang=sk'%query
            root, response = load_url(log, query_sk, br)
            self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)

        # search via title and authors field
        if not exact_match and len(matches) < max_results:
            query = self.create_query(log, title=title, authors=authors)
            _, response = load_url(log, query, br)
            if response.geturl().find('search?') == -1:
                matches.append(''.join([response.geturl().replace('/knihy/', '/zajimavosti-knihy/')]))
                log.info('ISBN in query, redirected right to book page...')
            else:
                log.debug(u'Querying title + authors: %s'%query)
                root, response = load_url(log, query, br)
                self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)

        # try only with title
        if not exact_match and len(matches) < max_results and title:
            query = self.create_query(log, title=title, authors=None)
            root, response = load_url(log, query, br)
            if response.geturl().find('search?') == -1:
                matches.append(''.join([response.geturl().replace('/knihy/', '/zajimavosti-knihy/')]))
                log.info('ISBN in query, redirected right to book page...')
            else:
                log.debug(u'Querying only title: %s'%query)
                root, response = load_url(log, query, br)
                self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)

        # # try only with one word from title (longest first)
        # if not exact_match and len(matches) < max_results and title:
        #     title_split = title.split(' ')
        #     title_split.sort(key=lambda i: (-len(i), i))
        #     if len(title_split) > 1:
        #         for word in title_split:
        #             query = self.create_query(log, title=word, authors=None)
        #             log.debug('Querying only one word from title: %s (%s): %s' %(word, title, query))
        #             root, response = load_url(log, query, br)
        #             if response.geturl().find('search?') == -1:
        #                 matches.append(''.join([response.geturl().replace('/knihy/', '/zajimavosti-knihy/')]))
        #                 log.info('ISBN in query, redirected right to book page...')
        #             else:
        #                 log.debug(u'Querying only title: %s'%query)
        #                 root, response = load_url(log, query, br)
        #                 self._parse_search_results(log, word, authors, root, matches, no_matches, timeout)

        # try only with authors
        if not exact_match and len(matches) < max_results and authors:
            query = self.create_query(log, title=None, authors=authors)
            log.debug(u'Querying only authors: %s'%query)
            root, response = load_url(log, query, br)
            self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)

        # try only with one author
        if not exact_match and len(matches) < max_results and authors:
            for auth in authors:
                if len(matches) >= max_results:
                    break
                query = self.create_query(log, title=None, authors=[auth])
                log.debug('Querying only one author named %s \n Query: %s' %(auth, query))
                root, response = load_url(log, query, br)
                self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)

        # try only with one part of authors name
        if not exact_match and len(matches) < max_results and authors:
            for auth in authors:
                if len(matches) >= max_results:
                    break
                name_split = auth.split(' ')
                if len(name_split) > 1:
                    for name in reversed(name_split):
                        if len(matches) >= max_results:
                            break
                        query = self.create_query(log, title=None, authors=[name])
                        log.debug('Querying only one part of authors name -  %s (%s): %s' %(name, auth, query))
                        root, response = load_url(log, query, br)
                        self._parse_search_results(log, title, authors, root, matches, no_matches, timeout)
                        log.debug('----Matches after process: %s %s'%(len(matches), matches))


        # search for tales on Google (title + authors)
        try:
            if not exact_match and google_engine and tales_search:
                query = self.create_query(log, title=title, authors=authors, tales=True, search_engine='google')
                root, response = load_url(log, query, br)
                log.debug(u'Querying tales via google: %s'%query)

                br.set_simple_cookie('CONSENT', 'PENDING+987', '.google.com', path='/')
                template = b'\x08\x01\x128\x08\x14\x12+boq_identityfrontenduiserver_20231107.05_p0\x1a\x05en-US \x03\x1a\x06\x08\x80\xf1\xca\xaa\x06'
                from datetime import date
                from base64 import standard_b64encode
                template.replace(b'20231107', date.today().strftime('%Y%m%d').encode('ascii'))
                br.set_simple_cookie('SOCS', standard_b64encode(template).decode('ascii').rstrip('='), '.google.com', path='/')

                root, response = load_url(log, query, br)
                self._parse_google_search_results(log, title, authors, root, matches, no_matches, timeout)
        except Exception as e:
            log.debug(u'Error while Google searching: %s'%e)
        # search in tales (title + authors)
        if not exact_match and len(matches) < max_results and tales_search:
            query = self.create_query(log, title=title, authors=authors, tales=True)
            log.debug('Querying for tales (title + authors)..)\n Query: %s'%query)
            root, response = load_url(log, query, br)
            self._parse_tales_results(log, title, authors, root, matches, no_matches, timeout)
            log.debug('----Matches after process: %s: %s' %(len(matches), matches))
        # search in tales only with title field
        if not exact_match and len(matches) < max_results and tales_search:
            query = self.create_query(log, title=title, authors=[], tales=True)
            log.debug('Querying for tales (title only)..)\n Query: %s'%query)
            root, response = load_url(log, query, br)
            self._parse_tales_results(log, title, authors, root, matches, no_matches, timeout)
            log.debug('----Matches after process: %s: %s' %(len(matches), matches))
        # search in tales only with one word from title (longest first)
        if not exact_match and len(matches) < max_results and title and tales_search:
            title_split = title.split(' ')
            title_split.sort(key=lambda i: (-len(i), i))
            if len(title_split) > 1:
                for word in title_split:
                    query = self.create_query(log, title=word, authors=None, tales=True)
                    log.debug('Querying only one word from tale title: %s (%s): %s' %(word, title, query))
                    root, response = load_url(log, query, br)
                    self._parse_tales_results(log, word, None, root, matches, no_matches, timeout)
        ## END of tales

        # if no_matches:
        #     for nmatch in no_matches:
        #         if len(matches) < max_results and not(nmatch in matches):
        #             matches.append(nmatch)
        log.info('Matches: %s'%(matches))

        if abort.is_set():
            log.info("Abort is set to true, aborting")
            return

        if not matches:
            log.error('No matches found. Try to fill Title field.')
            return

        log.debug('Starting #%s workers for: %s' % (len(matches), matches))
        from calibre_plugins.databazeknih.worker import Worker
        workers = [Worker(url, result_queue, br, log, i, self, meta_dict) for i, url in
                enumerate(matches)]

        for w in workers:
            w.start()
            time.sleep(0.1)

        a_worker_is_alive = True
        while not abort.is_set() and a_worker_is_alive:
            log.debug('Waiting for workers')
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                a_worker_is_alive |= w.is_alive()

        return None

    def _parse_google_search_results(self, log, orig_title, orig_authors, root, matches, no_matches, timeout):
        log.debug('Parsing google results......')

        import calibre_plugins.databazeknih.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_MAX_DOWNLOADS, cfg.DEFAULT_STORE_VALUES[cfg.KEY_MAX_DOWNLOADS])
        results = root.xpath('//a[h3]')
        found_title = ''
        result_url = ''
        log.debug('Found %s results'%len(results))
        for result in results:
            try:
                found_title = result.xpath('h3/text()')[0]
                result_url = result.xpath('@href')[0].replace('/knihy/', '/zajimavosti-knihy/').split('?')[0]
                log.debug('kniha: %s .. orig.autor: %s' %(found_title, orig_authors))
            except:
                log.debug('Xpath with found_title or URL not found in result')
                continue
            log.debug('Result URL: %r'%result_url)
            if result_url.find(databazeknih.BASE_URL) == -1:
                log.debug('Result is not databazeknih site, skipping..')
                continue

            # try to recognize authors
            vlozit = False
            if orig_authors:
                title_list = found_title.split('-')
                if len(title_list) == 2:
                    title, author = title_list
                else:
                    author = found_title
                author = author.replace(' (p)', '').lower()
                found_auths = {a for a in author.split()[1:] if len(a) > 2} # list of founded names
                found_auths = {a.strip('ová') for a in found_auths} # without 'ová
                found_auths_ova = {'%sová' %a for a in found_auths} #added 'ová'
                found_auths = found_auths.union(found_auths_ova)
                #hledá shodu v příjmení i jménu
                if orig_authors:
                    orig_authors = ' '.join(orig_authors).split()
                    orig_auths = {o.lower().replace(',', '') for o in orig_authors}
                    if orig_auths.intersection(found_auths):
                        # vlozit = True
                        vlozit = self.compare_text(orig_title, title[0],log)
                    log.info('found_auths: %s .. orig_auths: %s'%(found_auths, orig_auths))
                #pokud je zadán pouze název
                if orig_authors is None:
                    # vlozit = True
                    vlozit = self.compare_text(orig_title, title[0],log)

            if vlozit and result_url not in matches and len(matches) < max_results:
                matches.append(result_url)
            elif result_url is not None and result_url not in no_matches:
                no_matches.append(result_url)
            if len(matches) >= max_results:
                break

        log.info('Matches: %s .. No matches: %s'%(matches, no_matches))
        if no_matches:
            for nmatch in no_matches:
                if len(matches) < max_results and not(nmatch in matches):
                    matches.append(nmatch)

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, no_matches, timeout):
        log.debug('Parsing databazeknih book results......')

        import calibre_plugins.databazeknih.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_MAX_DOWNLOADS, cfg.DEFAULT_STORE_VALUES[cfg.KEY_MAX_DOWNLOADS])

        results = root.xpath('//p[@class="new"]')
        result_url = None
        log.debug('Found %s results'%len(results))
        for result in results:
            title = result.xpath('a//text()')
            log.debug('kniha: %s .. orig.autor: %s' %(title, orig_authors))
            first_author = result.xpath('span[@class="smallfind"]//text()')
            if not first_author:
                continue
            vlozit = False
            first_author = first_author[0].replace(' (p)', '').lower()
            found_auths = {a for a in first_author.split()[1:] if len(a) > 2} # list of founded names (now with first names too)
            found_auths = {a.strip('ová') for a in found_auths} # without 'ová'
            found_auths_ova = {'%sová' %a for a in found_auths} #added 'ová'
            found_auths = found_auths.union(found_auths_ova)
            #hledá shodu v příjmení i jménu
            if orig_authors:
                orig_authors = ' '.join(orig_authors).split()
                orig_auths = {o.lower().replace(',', '') for o in orig_authors}
                if orig_auths.intersection(found_auths):
                    log.debug('found_auths:')
                    # vlozit = True
                    vlozit = self.compare_text(orig_title, title,log)
                log.info('found_auths: %s .. orig_auths: %s'%(found_auths, orig_auths))
            #pokud je zadán pouze název
            if orig_authors is None:
                # vlozit = True
                vlozit = self.compare_text(orig_title, title,log)


            book_url = result.xpath('a[@class="new"]/@href')
            result_url = '%s%s'%(databazeknih.BASE_URL, book_url[0][1:])
            result_url = result_url.replace('/knihy/', '/zajimavosti-knihy/')
            log.debug('Result URL: %r'%result_url)
            if vlozit and result_url not in matches and len(matches) < max_results:
                matches.append(result_url)
            elif result_url is not None and result_url not in no_matches:
                no_matches.append(result_url)
            if len(matches) >= max_results:
                break

        log.info('Matches: %s .. No matches: %s'%(matches, no_matches))
  
    def _parse_tales_results(self, log, orig_title, orig_authors, root, matches, no_matches, timeout):

        log.debug('Parsing tales results......')
        import calibre_plugins.databazeknih.config as cfg
        max_results = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.KEY_MAX_DOWNLOADS, cfg.DEFAULT_STORE_VALUES[cfg.KEY_MAX_DOWNLOADS])
        
        results = root.xpath('//ul[@class="new2 odtopm"]')
        result_url = None
        log.debug('Founded %s results'%len(results))
        for result in results:
            title = result.xpath('li/a//text()')
            log.debug('kniha: %s .. orig.autor: %s' %(title, orig_authors))
            first_author = result.xpath('li/span[@class="smallfind odl"]//text()')
            vlozit = False
            if first_author: # this is maybe unnecessary in tales (there are not pseudonames in results but original names of authors)
                first_author = first_author[0].replace(' (p)', '').lower()
                found_auths = {a for a in first_author.split()[1:] if len(a) > 2} # list of founded names
                found_auths = {a.strip('ová') for a in found_auths} # without 'ová
                found_auths_ova = {'%sová' %a for a in found_auths} #added 'ová'
                found_auths = found_auths.union(found_auths_ova)
                #hledá shodu v příjmení i jménu
                if orig_authors:
                    orig_authors = ' '.join(orig_authors).split()
                    orig_auths = {o.lower().replace(',', '') for o in orig_authors}
                    if orig_auths.intersection(found_auths):
                        # vlozit = True
                        vlozit = self.compare_text(orig_title, title, log)
                    log.info('found_auths: %s .. orig_auths: %s'%(found_auths, orig_auths))
                #pokud je zadán pouze název
                if orig_authors is None:
                    # vlozit = True
                    vlozit = self.compare_text(orig_title, title,log)
                book_url = result.xpath('li/a/@href')
                result_url = '%s%s'%(databazeknih.BASE_URL, book_url[0][1:])
                log.debug('Result URL:%r'%result_url)
            if vlozit and result_url not in matches and len(matches) < max_results:
                matches.append(result_url)
            elif result_url is not None and result_url not in no_matches:
                no_matches.append(result_url)
            if len(matches) >= max_results:
                break
        
        log.info('Matches: %s .. No matches: %s'%(matches, no_matches))

    def compare_text(self, orginal_name, new, log):
        log.debug('array orginal: %s .. new: %s'%(orginal_name, new))
        for new_name in new:
            orginal = orginal_name
            if "\n" in new_name:
                continue
            log.debug('try compare new: %s'%(new))
            log.info('orginal: %s .. new: %s'%(orginal, new_name))
            orginal = orginal.lower()
            new_name = new_name.lower()
            log.info("try lower")
            if (orginal == new_name):
                return True
            new_name = remove_diacritic(new_name)
            orginal = remove_diacritic(orginal)
            log.info("try diacritic")
            if (orginal == new_name):
                return True
            new_name = remove_punctuation(new_name)
            orginal = remove_punctuation(orginal)
            log.info("try punctuation")
            if (orginal == new_name):
                return True
            new_name = new_name.replace(' ', '')
            orginal = orginal.replace(' ', '')
            log.info("try spaces")
            if (orginal == new_name):
                return True
        return False

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        import calibre_plugins.databazeknih.config as cfg
        max_covers = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.MAX_COVERS, cfg.DEFAULT_STORE_VALUES[cfg.MAX_COVERS])
        obalky_cover = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.OBALKYKNIH_COVER, cfg.DEFAULT_STORE_VALUES[cfg.OBALKYKNIH_COVER])
        if max_covers == 0:
            log.info('Searching for covers on databazeknih is disabled. You can enable it in plugin preferences.')
            return

        br = self.browser
        cached_url = self.get_cached_cover_url(identifiers)

        # none img_urls .. searching for some with identify
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break

        if cached_url is not None:
            # one img_url
            if len(cached_url) == 1:
                try:
                    cdata = br.open_novisit(cached_url[0], timeout=timeout).read()
                    result_queue.put((self, cdata))
                except:
                    log.exception('Failed to download cover from: ', cached_url[0])
            # multiple img_urls
            elif len(cached_url) > 1:
                big_urls = []
                small_urls = []
                # try if bigger images exists
                for url in cached_url:
                    big_url_parts = url.split('/')
                    big_url_parts[-1] = 'big_%s'%big_url_parts[-1]
                    big_exists = False
                    try:
                        load_url(log, '/'.join(big_url_parts), br)
                        big_exists = True
                    except:
                        big_exists = False
                    finally:
                        if big_exists and 'databazeknih' in url:
                            big_urls.append('/'.join(big_url_parts))
                        else:
                            small_urls.append(url)

                big_urls.extend(small_urls)
                if obalky_cover:
                    checked_urls = big_urls[:max_covers+1]
                else:
                    checked_urls = big_urls[:max_covers]
                for url in checked_urls:
                    try:
                        cdata = br.open_novisit(url, timeout=timeout).read()
                        result_queue.put((self, cdata))
                    except:
                        log.exception('Failed to download cover from: ', url)

        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        

if __name__ == '__main__': # tests
    # 156581 - no orig year foreign exception
    # 1114 - two editions, two ISBNs
    # 15829 - 2x ISBN, publisher, edition
    #
    # To run these test use:
    # calibre-debug -e __init__.py

    from calibre.ebooks.metadata.sources.test import (test_identify_plugin, title_test, authors_test, series_test)

    test_identify_plugin(databazeknih.name,
        [
            (   # A book with an ISBN
                {'title':'61 hodin', 'authors':['Lee Child']},
                [title_test('61 hodin'),
                 authors_test(['Lee Child']),
                 series_test('Jack Reacher', 14.0)]
            ),
            """
            (# A book throwing an index error
                {'title':'The Girl Hunters', 'authors':['Mickey Spillane']},
                [title_test('The Girl Hunters', exact=True),
                 authors_test(['Mickey Spillane']),
                 series_test('Mike Hammer', 7.0)]
            ),

            (# A book with no ISBN specified
                {'title':"Playing with Fire", 'authors':['Derek Landy']},
                [title_test("Playing with Fire", exact=True),
                 authors_test(['Derek Landy']),
                 series_test('Skulduggery Pleasant', 2.0)]
            ),

            (# A book with a Goodreads id
                {'identifiers':{'databazeknih': '409414'},
                    'title':'61 Hours', 'authors':['Lee Child']},
                [title_test('61 Hours', exact=True),
                 authors_test(['Lee Child']),
                 series_test('Jack Reacher', 14.0)]
            ),
            """
        ])