﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, print_function)

import time
import copy
import random
from threading import Event
from collections import (defaultdict, namedtuple)
from PyQt5.Qt import (Qt, QProgressDialog)

try:
    import cPickle as pickle
except ImportError:
    import pickle   # cPickle no longer present in calibre 4.99


from calibre.gui2.threaded_jobs import ThreadedJob
from calibre.gui2 import (Dispatcher, config)
from calibre.utils.config_base import tweaks
from calibre.constants import numeric_version

from calibre_plugins.overdrive_link import ActionOverdriveLink
from calibre_plugins.overdrive_link.author_prep import (alternate_author_names)
from calibre_plugins.overdrive_link.title_prep import (safe_word_from_title, force_unique_title)
from calibre_plugins.overdrive_link.match import (
    is_any_same_author, author_match_prep, UNKNOWN, is_unknown, match_book_lists)
from calibre_plugins.overdrive_link.numbers import value_unit
from calibre_plugins.overdrive_link.book import (DiscoveredBook, author_sort_key)
from calibre_plugins.overdrive_link.formats import FORMAT_SCRIBD_EBOOK
from calibre_plugins.overdrive_link.fixups import fixup_book_data
from calibre_plugins.overdrive_link.link import (ODLink, ODLinkSet, IDENT_AVAILABLE_LINK, IDENT_NEEDED_LINK)
from calibre_plugins.overdrive_link.cache import LibraryBookCache
from calibre_plugins.overdrive_link.log import (ODStatus, ODLog, JobLog)
from calibre_plugins.overdrive_link.net import (set_num_simultaneous_queries)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.tweak import (
    ALL_OVERDRIVE_LINK_TWEAKS, TWEAK_SKIP_LOGIN, TWEAK_INCREMENTAL_AUTHOR_COUNT, TWEAK_IGNORE_LOGIN_FAILURE,
    TWEAK_WAIT_GROUPS, TWEAK_CHECK_ONLY_ENABLED_LIB_AVAIL)
from calibre_plugins.overdrive_link.amazon import (Amazon)
from calibre_plugins.overdrive_link.audible import (Audible)
from calibre_plugins.overdrive_link.axis_360 import (Axis360)
from calibre_plugins.overdrive_link.cloud_library import (CloudLibrary)
from calibre_plugins.overdrive_link.ebscohost import (EBSCOhost)
from calibre_plugins.overdrive_link.enki import (Enki)
from calibre_plugins.overdrive_link.freading import (Freading)
from calibre_plugins.overdrive_link.hoopla import (Hoopla)
from calibre_plugins.overdrive_link.open_library import (OpenLibrary)
from calibre_plugins.overdrive_link.overdrive import (OverDrive)
from calibre_plugins.overdrive_link.internet_archive import (InternetArchive)
from calibre_plugins.overdrive_link.project_gutenberg import (ProjectGutenberg, build_gutenberg_index)
from calibre_plugins.overdrive_link.scribd import (Scribd)

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (repr, str)

try:
    from calibre_plugins.overdrive_link_debug.config import DEBUG_MODE
    from calibre_plugins.overdrive_link_debug.jobs import DEBUG_SPECIAL_JOBS
    from calibre_plugins.overdrive_link_debug.formats import match_needed
except ImportError:
    DEBUG_MODE = DEBUG_SPECIAL_JOBS = match_needed = None


__license__ = 'GPL v3'
__copyright__ = '2012-2022, John Howell <jhowell@acm.org>'


# Initializing these here avoids problems within search jobs
Amazon.register()
Audible.register()
Axis360.register()
CloudLibrary.register()
EBSCOhost.register()
Enki.register()
Freading.register()
Hoopla.register()
InternetArchive.register()
OpenLibrary.register()
OverDrive.register()
Scribd.register()
ProjectGutenberg.register()


MAX_ALLOWED_LINKS_BY_LIBRARY = 4
MAX_WRONG_AUTHOR_RESULTS = 20

LOG_SEPERATOR = '================================================='


def _get_current_availability(abort, log, status, config):
    return get_current_availability(abort, log, status, config)     # allow forward reference


SpecialJob = namedtuple('SpecialJob', 'name function')

SPECIAL_JOBS = {
    'build_gutenberg_index_': SpecialJob('Build Project Gutenberg search index', build_gutenberg_index),
    'get_current_availability_': SpecialJob('Get current availability of linked books', _get_current_availability),
    }

if DEBUG_SPECIAL_JOBS is not None:
    SPECIAL_JOBS.update(DEBUG_SPECIAL_JOBS)

'''
Perform search using a threaded job
'''


def worker_limit():
    return int(config['worker_limit']/2.0)  # from calibre prefs


class AuthorGroup(object):
    def __init__(self, book=None):
        self.authors = set()
        self.books = set()
        if book:
            self.books.add(book)
        self.first_author = None

    def add_author(self, author):
        self.authors.add(author)
        self.first_author = None

    def merge(self, other):
        self.authors.update(other.authors)
        self.first_author = None
        self.books.update(other.books)

    def sort_key(self):
        if (self.first_author is None) and self.authors:
            self.first_author = author_sort_key(sorted(list(self.authors), key=author_sort_key)[0])

        return (-len(self.authors), self.first_author)


def start_search_jobs(action, job_id, config, all_calibre_books, selected_ids, discover_books, orig_discovered_books,
                      keywords, incremental_sequence, callback):

    # Organize the selected ids by author(s)
    # Split into multiple groups. Keep authors with books in common together in same group.
    groups = set()
    group_of_author = {}

    for book in all_calibre_books:
        if book.id in selected_ids:
            book_group = AuthorGroup(book=book)

            for author in book.authors[0:min(config.num_authors_to_search, len(book.authors))]:
                book_group.add_author(author)

                orig_group = group_of_author.get(author)
                if orig_group is not None:
                    orig_group.merge(book_group)
                    for g_author in book_group.authors:
                        group_of_author[g_author] = orig_group

                    groups.discard(book_group)
                    book_group = orig_group
                else:
                    group_of_author[author] = book_group

            groups.add(book_group)

    #print('***job prep: %d authors, %d groups' % (len(group_of_author), len(groups)))

    # combine groups into jobs limited by a configured number of authors
    jobs = []
    job_group = AuthorGroup()

    if keywords:
        split_search_author_count = 0
    elif incremental_sequence is None:
        split_search_author_count = config.split_search_author_count
    else:
        split_search_author_count = tweaks.get(TWEAK_INCREMENTAL_AUTHOR_COUNT, config.split_search_author_count)

    while len(groups) > 0:
        sorted_groups = sorted(list(groups), key=lambda g: g.sort_key())

        if len(job_group.authors) == 0:
            # new job or not splitting. Take largest remaining group
            group = sorted_groups[0]
            job_group.merge(group)
            groups.discard(group)

        else:
            # add largest remaining group that will fit to existing job
            for group in sorted_groups:
                if (split_search_author_count == 0) or (len(job_group.authors) + len(group.authors) <= split_search_author_count):
                    # found the largest group that will fit in this job
                    job_group.merge(group)
                    groups.discard(group)
                    break
            else:
                # cannot add more without exceeding the configured author limit
                jobs.append(job_group)
                job_group = AuthorGroup()
                continue

    if len(job_group.authors) > 0 or len(jobs) == 0:
        # handle any remainder
        jobs.append(job_group)

    # queue jobs with progress indication
    progress = QProgressDialog('Preparing ' + value_unit(len(jobs), 'search job'), 'Cancel', 0, len(jobs), action.gui)
    progress.setWindowTitle(action.name)
    progress.setWindowFlags(progress.windowFlags() & (~Qt.WindowContextHelpButtonHint))
    progress.setMinimumWidth(400)
    progress.setMinimumDuration(2000)   # Show progress only if taking a while
    progress.setModal(True)
    progress.setValue(0)

    config.max_simultaneous_jobs = min(worker_limit(), len(jobs) if incremental_sequence is None else 1) \
        if config.allow_simultaneous_jobs else 1

    pickled_config = pickle.dumps(config)
    pickled_orig_discovered_books = pickle.dumps(orig_discovered_books)
    pickled_all_calibre_books = pickle.dumps(all_calibre_books) if discover_books else None

    for i, job in enumerate(jobs):
        progress.setValue(i)

        if incremental_sequence is None or (incremental_sequence % len(jobs)) == i:
            pickled_calibre_books = pickled_all_calibre_books if discover_books else pickle.dumps(list(job.books))
            search_authors = sorted(list(job.authors), key=author_sort_key)
            discovery_authors = list(group_of_author.keys())

            if keywords:
                if keywords in SPECIAL_JOBS:
                    desc = SPECIAL_JOBS[keywords].name
                else:
                    desc = 'Search for books with keywords: ' + keywords
            else:
                desc = 'Search for books by ' + value_unit(len(search_authors), 'author')

            if config.allow_simultaneous_jobs:
                '''
                This uses a separate process to perform the work (ParallelJob). This allows greater parallelism.
                Pickle objects ahead of time otherwise the automatic unpickle will fail due
                to the plugin not being loaded in time.
                '''
                action.gui.job_manager.run_job(Dispatcher(callback), 'arbitrary_n', args=[
                    'calibre_plugins.overdrive_link.jobs', 'lending_library_search_process', (
                        job_id, pickled_config, pickled_calibre_books, search_authors,
                        discover_books, pickled_orig_discovered_books, keywords, discovery_authors)], description=desc)

            else:
                '''
                This uses an in-process thread to perform the work. This offers high performance, but can suffer from
                memory leaks and will make the GUI less responsive.
                Make a copy of the config, calibre book and discovered book objects to avoid interaction with later changes by gui.
                '''
                job = ThreadedJob(
                    ActionOverdriveLink.name, desc, lending_library_search_thread, (
                        job_id, pickled_config, pickled_calibre_books, search_authors,
                        discover_books, pickled_orig_discovered_books, keywords, discovery_authors), {}, callback)
                action.gui.job_manager.run_threaded_job(job)

            action.gui.status_bar.show_message('Lending library search started', 3000)

        if progress.wasCanceled():
            return

    progress.reset()


def lending_library_search_process(job_id, pickled_config, pickled_all_calibre_books, search_authors,
                                   discover_books, pickled_orig_discovered_books, keywords, discovery_authors,
                                   notification=lambda x, y: x):
    abort = Event()
    log = JobLog(ODLog(), [], [], [])

    try:
        calibre_books, discovered_books = lending_library_search(
            pickled_config, pickled_all_calibre_books, search_authors,
            discover_books, pickled_orig_discovered_books, keywords, discovery_authors,
            abort, log, notification.queue)

        return (job_id, calibre_books, discovered_books, log.errors, log.warnings, log.summaries)

    except Exception as e:
        log.exception('Search job failure', e)
        return (job_id, [], [], log.errors, log.warnings, log.summaries)


def lending_library_search_thread(job_id, pickled_config, pickled_all_calibre_books, search_authors,
                                  discover_books, pickled_orig_discovered_books, keywords, discovery_authors,
                                  abort, log, notifications):

    log = JobLog(log, [], [], [])

    try:
        calibre_books, discovered_books = lending_library_search(
            pickled_config, pickled_all_calibre_books, search_authors,
            discover_books, pickled_orig_discovered_books, keywords, discovery_authors,
            abort, log, notifications)

        return (job_id, calibre_books, discovered_books, log.errors, log.warnings, log.summaries)

    except Exception as e:
        log.exception('Search job failure', e)
        return (job_id, [], [], log.errors, log.warnings, log.summaries)


def lending_library_search(pickled_config, pickled_all_calibre_books, search_authors,
                           discover_books, pickled_orig_discovered_books, keywords, discovery_authors,
                           abort, log, notifications):
    '''
    This function performs the search for books as a job in a separate thread or process.

    Warning: Do not change number of arguments between releases for this (and its callers) so that
    the job will get far enough to detect the wrong version and report it properly.
    '''
    random.seed()

    log.info('%s %s, calibre %s' % (ActionOverdriveLink.name, '.'.join([str(v) for v in ActionOverdriveLink.version]),
             '.'.join([str(v) for v in numeric_version])))

    config = pickle.loads(pickled_config)
    if (not hasattr(config, 'plugin_version')) or (config.plugin_version != ActionOverdriveLink.version):
        log.error('Plugin version mismatch. A new version of the Overdrive Link plugin was installed without restarting calibre.')
        return ([], [])

    status = ODStatus(queue=notifications)
    STATUS_RANGE_SPECIAL = (0.01, 1.0, '')

    STATUS_RANGE_PREPARE = (0.0, 0.01, 'Preparing for search')
    STATUS_RANGE_SEARCH = (0.01, 0.50, 'search')

    if config.check_availability_of_new_links:
        STATUS_RANGE_BOOK = (0.50, 0.89, 'get book')
        STATUS_RANGE_MATCH = (0.89, 0.90, 'match')
        STATUS_RANGE_AVAIL = (0.90, 1.00, 'get availability')
    else:
        STATUS_RANGE_BOOK = (0.50, 0.99, 'get book')
        STATUS_RANGE_MATCH = (0.99, 1.00, 'match')

    status.subrange(STATUS_RANGE_PREPARE[0], STATUS_RANGE_PREPARE[1], STATUS_RANGE_PREPARE[2])

    all_calibre_books = pickle.loads(pickled_all_calibre_books)
    orig_discovered_books = pickle.loads(pickled_orig_discovered_books)

    set_num_simultaneous_queries(config.max_simultaneous_jobs)  # set for query pacing
    #init_ssl(log) # special SSL handling

    split_keywords = [] if keywords is None else keywords.split()

    if split_keywords and split_keywords[0] in SPECIAL_JOBS:
        log.info(LOG_SEPERATOR)
        special_status = status.subrange(STATUS_RANGE_SPECIAL[0], STATUS_RANGE_SPECIAL[1], STATUS_RANGE_SPECIAL[2])
        special_status = special_status.update_subrange(0, 1, keywords)

        # save for use in get_current_availability, inventory_amazon_et, uncache_books, get_new_overdrive_book_ids
        config.calibre_books = all_calibre_books
        config.discovered_books = orig_discovered_books    # save for use in get_new_overdrive_book_ids
        config.argv = split_keywords

        result = SPECIAL_JOBS[split_keywords[0]].function(abort, log, special_status, config)

        #cleanup_ssl()   # special SSL handling

        return result if result is not None else ([], [])

    if config.search_language:
        log.info('Search language: %s' % config.search_language)

    log.info('Search formats: %s' % ', '.join(sorted(list(config.search_formats))))

    for tweak in ALL_OVERDRIVE_LINK_TWEAKS:
        if tweak in tweaks:
            log.info("Plugin tweak: %s = %s" % (tweak, repr(tweaks[tweak])))

    all_libraries, enabled_libraries = init_libraries(abort, log, config)
    found_library_books = set()
    discover_by_keyword = keywords is not None

    search_status = status.subrange(STATUS_RANGE_SEARCH[0], STATUS_RANGE_SEARCH[1], STATUS_RANGE_SEARCH[2])

    if discover_by_keyword:
        log.info(LOG_SEPERATOR)
        find_status = search_status.update_subrange(0, 1, 'keywords-' + keywords)

        new_library_books = find_library_books_by_keyword(
            abort, log, find_status, config, enabled_libraries, keywords, discover_books)

        if new_library_books:
            found_library_books.update(new_library_books)
    else:
        for i, author in enumerate(search_authors):
            if is_unknown(author):
                continue

            log.info(LOG_SEPERATOR)
            log.info('Finding library books by %s' % author)

            find_status = search_status.update_subrange(i, len(search_authors), author)

            if abort.is_set():
                abort_job()

            try_titles = set()
            if not discover_books:
                # Optimization when searching for a single title with no other book discovery
                for cbook in all_calibre_books:
                    if author in cbook.authors:
                        title_word = safe_word_from_title(cbook.title)    # Use partial title to restrict search
                        if title_word:
                            try_titles.add(title_word)
                        else:
                            try_titles = set()  # a book doesn't have a good title word, need to check all
                            break

            try_titles = sorted(list(try_titles))

            alt_names = alternate_author_names(author, config, log)     # check for variants of the author name
            #log.info("alt names: %s" % ", ".join(alt_names))

            discover_by_author = discover_books and (author_match_prep(author) not in config.no_discovery_authors)

            new_library_books = find_library_books_by_author(
                abort, log, find_status, alt_names, config, enabled_libraries, try_titles, discover_by_author)

            if new_library_books:
                found_library_books.update(new_library_books)

    if abort.is_set():
        abort_job()

    # Get more details on found library books and filter out those that don't match search parameters
    log.info(LOG_SEPERATOR)
    get_book_status = status.subrange(STATUS_RANGE_BOOK[0], STATUS_RANGE_BOOK[1], STATUS_RANGE_BOOK[2])

    library_books = get_library_book_details(abort, log, get_book_status, config, discover_books, discover_by_keyword,
                                             found_library_books)

    if abort.is_set():
        abort_job()

    log.info(LOG_SEPERATOR)
    status.subrange(STATUS_RANGE_MATCH[0], STATUS_RANGE_MATCH[1], STATUS_RANGE_MATCH[2])

    matched_calibre_books, unmatched_library_books = match_library_and_calibre_books(
            log, config, list(all_calibre_books), library_books, search_authors)

    if config.check_availability_of_new_links:
        log.info(LOG_SEPERATOR)
        availability_check_books = [b for b in matched_calibre_books if b.check_availability]

        #for b in availability_check_books:
        #    log.info("initial links for %s: %s" % (str(b), b.links_str(config)))

        log.info('Updating current availability for %d books with newly found (or possibly missing) available links' % (
                    len(availability_check_books)))
        check_avail_status = status.subrange(STATUS_RANGE_AVAIL[0], STATUS_RANGE_AVAIL[1], STATUS_RANGE_AVAIL[2])

        get_current_avail(abort, log, check_avail_status, config, availability_check_books, all_libraries)

        #for b in availability_check_books:
        #    log.info("final links for %s: %s" % (str(b), b.links_str(config)))

    if discover_books:
        log.info(LOG_SEPERATOR)
        discovered_books = discover_new_books(log, config, unmatched_library_books, all_calibre_books,
                                              orig_discovered_books, discovery_authors, discover_by_keyword)
    else:
        discovered_books = []

    #cleanup_ssl()   # special SSL handling

    return (matched_calibre_books, discovered_books)


def init_libraries(abort, log, config):
    all_libraries = []
    enabled_libraries = []

    for lending_lib in config.libraries:
        lib = SearchableLibrary.create(log, config, lending_lib)
        lib.signin_required = lib.signed_in = lib.did_sign_in = lib.did_second_sign_in = False

        if abort.is_set():
            abort_job()

        all_libraries.append(lib)
        if lending_lib.enabled:
            enabled_libraries.append(lib)

    return (all_libraries, enabled_libraries)


def find_library_books_by_keyword(abort, log, status, config, search_libraries, keywords, discover_books):
    # Perform a search using keywords as the title

    log.summary('Finding library books with keywords %s' % keywords)

    library_books = set()

    if not discover_books:
        return library_books

    total_searches = len(search_libraries)
    searches_complete = 0

    for lib in search_libraries:
        find_book_at_lib(log, library_books, '', [keywords], True, config, lib,
                         status, searches_complete, total_searches, abort)

        searches_complete += 1

        if abort.is_set():
            abort_job()

    return library_books


def find_library_books_by_author(abort, log, status, alt_names, config, search_libraries, try_titles, discover_books):

    '''
    Search lending libraries for a set of calibre books with the same primary author
    Try the search with variations on the author name in order to get maximum results.

    Most sites expect author names in "first last" format with optional punctuation.
    '''

    total_searches = len(search_libraries)
    searches_complete = 0
    library_books = set()

    for lib in search_libraries:
        for try_author in alt_names:
            find_book_at_lib(log, library_books, try_author, try_titles, False, config, lib,
                             status, searches_complete, total_searches, abort)

            if abort.is_set():
                abort_job()

        searches_complete += 1

    return library_books


def get_library_book_details(abort, log, status, config, discover_books, discover_by_keyword, library_books):

    '''
    merge books with same provider and book id (when possible) to reduce lookups
    '''

    if config.cache_days_to_keep > 0:
        unique_books = {}

        for lbook in library_books:
            if lbook.book_key in unique_books:
                unique_books[lbook.book_key].merge_from(
                    lbook,
                    check_same_formats=not SearchableLibrary.provider(lbook.provider_id).allow_format_merge,
                    check_add_formats=False)
            else:
                unique_books[lbook.book_key] = lbook

        library_books = set(unique_books.values())

    '''
    Check for results that match search criteria
    '''

    kept_books = set()

    for lbook in library_books:
        # if any formats are provided in find phase they cannot be added-to in get book details
        if len(lbook.formats) != 0 and lbook.formats.isdisjoint(config.search_formats):
            log.info('Ignoring (wrong format): %s' % repr(lbook))
        elif config.search_language and lbook.language and config.search_language != lbook.language:
            log.info('Ignoring (wrong language): %s' % repr(lbook))
        else:
            kept_books.add(lbook)

    library_books = kept_books

    '''
    Get more details for remaining books
    '''

    if library_books:
        with LibraryBookCache(log, config) as cache:
            library_books_to_get = []

            for lbook in library_books:
                lbook.ibook = None

                if lbook.allow_get_from_cache:
                    lbook.ibook = cache.get_book(lbook.book_key)

                if lbook.ibook is not None:
                    fixup_cached_ibook_data(lbook)

                if lbook.ibook is not None:
                    update_book_info(log, lbook)
                else:
                    library_books_to_get.append(lbook)

            new_ibooks = []
            random.shuffle(library_books_to_get)

            for i, lbook in enumerate(library_books_to_get):
                # Select a library host at random, giving preference to those that actually have the book
                select_from = lbook.available
                if not select_from:
                    select_from = lbook.purchasable
                if not select_from:
                    select_from = lbook.recommendable
                if not select_from:
                    log.error('No library known for %s' % repr(lbook))
                    continue

                lib = list(select_from)[random.randint(0, len(select_from)-1)]

                book_str = '%s@%s' % (lbook.book_id, lib.name)
                status.update(i / len(library_books_to_get), book_str)
                log.info('Getting info for %s' % book_str)
                log.context('Getting info for %s' % book_str)

                try:
                    start_time = time.time()
                    lbook.ibook = lib.get_book_info(lbook.book_id, cache)

                    duration = time.time() - start_time
                    if duration > 10.0:
                        log.info('Get book info took %.1f sec' % duration)

                except Exception as e:
                    log.exception('', e)

                log.context(None)

                if (lbook.ibook is not None) and lbook.ibook.cache_allowed:
                    new_ibooks.append(lbook.ibook)

                update_book_info(log, lbook)

                if abort.is_set():
                    abort_job()

            for new_ibook in new_ibooks:
                cache.save_book(new_ibook)

            for lbook in library_books:
                del lbook.ibook

    '''
    Recheck search criteria now that books are finalized
    Keep books with no author since that can happen for anthologies from Freading
    '''

    kept_books = set()

    for lbook in library_books:
        excluded_auths = list(set([author_match_prep(author) for author in lbook.authors]) & config.excluded_authors)

        if len(lbook.formats) == 0:
            log.info('Ignoring (no format): %s' % repr(lbook))
        elif lbook.formats.isdisjoint(config.search_formats):
            log.info('Ignoring (wrong format): %s' % repr(lbook))
        elif config.search_language and lbook.language and config.search_language != lbook.language:
            log.info('Ignoring (wrong language): %s' % repr(lbook))
        elif excluded_auths:
            log.info('Ignoring (excluded author %s): %s' % (' & '.join(excluded_auths), repr(lbook)))
        else:
            kept_books.add(lbook)

    library_books = kept_books

    '''
    Finalize links
    '''

    for lbook in library_books:
        lbook.create_links(config.search_formats)

    return library_books


def update_book_info(log, lbook):
    orig_repr = repr(lbook)

    if lbook.ibook is not None:
        lbook.merge_from(lbook.ibook, check_same_formats=False, check_add_formats=True)  # Update book with new information

    force_unique_title(log, lbook)  # fix books with incomplete titles

    # apply fix ups to known bad data
    fixup_book_data(log, lbook)

    # Calibre requires empty title or author to be "Unknown"

    if len(lbook.authors) == 0:
        lbook.authors.append(UNKNOWN)

    if len(lbook.title) == 0:
        lbook.title = UNKNOWN

    new_repr = repr(lbook)
    if new_repr != orig_repr:
        log.info('Updated book info: %s' % new_repr)


def match_library_and_calibre_books(log, config, searched_calibre_books, library_books, search_authors):
    '''
    Match the results from search against the list of books we are trying to locate
    '''
    log.info('Matching %s with %s' % (
            value_unit(len(searched_calibre_books), 'calibre book'), value_unit(len(library_books), 'library book')))

    start_time = time.time()

    unmatched_library_books = library_books.copy()

    # look for matches between calibre books and search result books

    for cbook, lbook in match_book_lists(searched_calibre_books, library_books, config, first_is_calibre_books=True):
        lbook.odnid = ''             # set temporary attribute

        if match_needed is not None:
            match_needed(log, cbook, lbook)
        else:
            log.info('For "%s" found match "%s"' % (repr(cbook), repr(lbook)))

        cbook.add_links_from(lbook)
        cbook.matched = True

        del lbook.odnid             # remove temporary attribute
        unmatched_library_books.discard(lbook)

    # check for possible link removal
    check_possible_removal_all = config.check_availability_of_new_links and not log.has_errors()

    for cbook in searched_calibre_books:
        check_possible_removal = (check_possible_removal_all and len(cbook.authors) == 1 and
                                  is_any_same_author(search_authors, cbook.authors, config))

        if check_possible_removal and (match_needed is not None) and not (cbook.odnid or cbook.orig_odid):
            log.info("Skipping removal check for %s" % repr(cbook))
            check_possible_removal = False

        if check_possible_removal:
            cbook.preserve_partial_orig_links(config)
        else:
            cbook.preserve_orig_links()

        cbook.check_availability = config.check_availability_of_new_links and (cbook.odid != cbook.orig_odid)
        if cbook.check_availability:
            cbook.matched = True

        if check_possible_removal:
            cbook.preserve_orig_links()

        cbook.remove_outdated_links()

    matched_calibre_books = [cbook for cbook in searched_calibre_books if cbook.matched]

    duration = time.time() - start_time
    log.info('Found %d matching calibre books in %.1f sec' % (len(matched_calibre_books), duration))

    return (matched_calibre_books, unmatched_library_books)


def discover_new_books(log, config, unmatched_library_books, all_calibre_books, orig_discovered_books,
                       possible_discovery_authors, discover_by_keyword):
    '''
    Optionally try to determine if the remaining search results are of possible interest to the user
    '''

    discovery_authors = []
    for author in possible_discovery_authors:
        if (author_match_prep(author) not in config.no_discovery_authors) and not is_unknown(author):
            discovery_authors.append(author)

    log.info('Checking %s, %s, %s' % (
            value_unit(len(unmatched_library_books), 'unmatched library book'),
            value_unit(len(orig_discovered_books), 'previously discovered book'),
            value_unit(len(discovery_authors), 'author')))

    discovered_books = []

    if len(discovery_authors) == 0 and not discover_by_keyword:
        return discovered_books

    possible_discovered_books = []
    start_time = time.time()

    for lbook in unmatched_library_books:
        # Possible new book discovered
        dbook = DiscoveredBook(authors=lbook.authors, title=lbook.title,
                               odid=lbook.odid, odrid=lbook.odrid, odpid=lbook.odpid,
                               publisher=lbook.publisher, pubdate=lbook.pubdate, isbn=lbook.isbn,
                               series=lbook.series, series_index=lbook.series_index, language=lbook.language)

        dbook.clear_orig_links()
        dbook.discard = False
        possible_discovered_books.append(dbook)

    odmatches = defaultdict(set)

    # see if any books were previously discovered
    for odbook, ndbook in match_book_lists(orig_discovered_books, possible_discovered_books, config):
        if ndbook.discard:
            continue

        odmatches[odbook].add(ndbook)    # consolidate all newly discovered books that match the same previously discovered book

    duration = time.time() - start_time
    log.info('Matched %d previously discovered books in %.1f sec' % (len(odmatches), duration))

    start_time = time.time()

    # see if the previously discovered books have changed availability
    for odbook in odmatches:
        # This book was discovered already previously.
        xodbook = copy.deepcopy(odbook)  # make a copy to avoid corrupting the original

        xodbook.move_current_links_to_orig()    # prior links are "original"
        xodbook.preserve_orig_links()

        for ndbook in odmatches[odbook]:
            xodbook.merge_from(ndbook)          # update with new links (will only add, not remove)
            ndbook.discard = True

        xodbook.remove_outdated_links()

        if xodbook.links_have_changed():
            log.info('Availability changed for previously discovered book: %s' % repr(xodbook))
            xodbook.is_newly_discovered = False
            discovered_books.append(xodbook)
        else:
            log.info('Unchanged previously discovered book: %s' % repr(xodbook))

    duration = time.time() - start_time
    log.info('Checking previously discovered books for link changes took %.1f sec' % duration)

    # Consolidate remaining newly discovered books with matching author/title
    start_time = time.time()
    for ndbook1, ndbook2 in match_book_lists(possible_discovered_books, possible_discovered_books, config):
        if (ndbook1 is ndbook2) or ndbook1.discard or ndbook2.discard:
            continue

        # update links in one and discard the other
        ndbook1.merge_from(ndbook2)
        ndbook2.discard = True

    duration = time.time() - start_time
    log.info('Consolidating discovered books took %.1f sec' % duration)

    # Add any newly discovered books
    start_time = time.time()
    discovery_authors_match_cache = {}
    newly_discovered = cache_hits = cache_misses = 0

    for ndbook in possible_discovered_books:
        if ndbook.discard:
            continue

        ndbook.remove_outdated_links()

        # don't save newly discovered books with no links (may be a Scribd non-book, etc.)
        if not (ndbook.odid or ndbook.odrid or ndbook.odpid):
            log.info('Ignoring (no links): %s' % repr(ndbook))
            continue

        # Make sure this book has an author we are looking for
        if not discover_by_keyword:
            for a in ndbook.authors:
                same_author = discovery_authors_match_cache.get(a)
                if same_author is None:
                    cache_misses += 1
                    discovery_authors_match_cache[a] = same_author = is_any_same_author(discovery_authors, [a], config)
                else:
                    cache_hits += 1

                if same_author:
                    break
            else:
                log.info('Ignoring (not discovery author): %s' % repr(ndbook))
                continue

        # Keep this new book
        log.info('New discovered book: %s' % repr(ndbook))
        ndbook.is_newly_discovered = True
        discovered_books.append(ndbook)
        newly_discovered += 1

    duration = time.time() - start_time
    log.info('Adding %d newly discovered books took %.1f sec (cache: %d hits, %d misses)' % (
                    newly_discovered, duration, cache_hits, cache_misses))

    return discovered_books


def find_book_at_lib(log, books, search_author, search_titles, keyword_search, config, lib,
                     search_status, searches_complete, total_searches, abort):

    '''
    Search library for books that match an author/title (or subsets thereof) and return the found matches with identifiers.
    books = Set of Books to be updated
    Return True if number of results limit exceeded
    '''
    search_status.update(searches_complete / total_searches, '%s@%s' % (search_author, lib.name))

    sign_in(log, lib, False)

    MAX_RESULTS_ALLOWED = 200
    limit_exceeded = False

    if lib.signin_required and (not lib.signed_in) and (not tweaks.get(TWEAK_IGNORE_LOGIN_FAILURE, False)):
        log.info('Cannot perform search at %s due to sign in failure' % lib.name)
        return limit_exceeded   # Sign in falure

    if len(lib.formats_supported & config.search_formats) == 0:
        log.info('Skipping %s: Supports none of the configured book formats' % lib.name)
        return limit_exceeded   # No search formats supported

    if len(search_titles) == 0 or (len(search_titles) > 1 and (not lib.is_amazon) and (not keyword_search)):
        search_titles = ['']    # search for all books by author

    for search_title in search_titles:
        desc = 'Search %s for %s by %s' % (lib.name, search_title if search_title else 'all', search_author if search_author else 'all')

        if len(search_author) <= 2 and not search_title:
            return limit_exceeded   # Very short author names return bad results

        new_books = set()
        log.info(desc)
        log.context(desc)
        try:
            start_time = time.time()
            limit_exceeded = lib.find_books(new_books, search_author, search_title, keyword_search)
            log.info('Found %s in %.1f sec' % (value_unit(len(new_books), 'book'), time.time() - start_time))

        except Exception as e:
            log.exception('', e)

        if limit_exceeded:
            if len(new_books) > MAX_RESULTS_ALLOWED:
                # discard excessive results as likely incorrect
                log.info('Excessive number of results returned by search. Results discarded.')
            else:
                log.info('Excessive number of results returned by search. Results kept.')
                books.update(new_books)

        else:
            books.update(new_books)

        log.context(None)

    return limit_exceeded


def sign_in(log, lib, for_availability):

    do_regular_sign_in = not tweaks.get(TWEAK_SKIP_LOGIN, False)

    if (for_availability and lib.sign_in_affects_get_current_availability and lib.did_sign_in and lib.card_number and
            not (do_regular_sign_in or lib.signed_in or lib.signin_required or lib.did_second_sign_in)):
        log.info('Performing second sign in with credentials for %s' % (lib.name))
        lib.did_sign_in = False     # do sign in again with credentials
        lib.did_second_sign_in = True

    if lib.did_sign_in:
        return

    log.info('Accessing library %s' % (lib.name))
    log.context('Signing in to %s' % (lib.name))
    try:
        start_time = time.time()

        lib.sign_in(lib.sign_in_affects_get_current_availability if for_availability else do_regular_sign_in)

        duration = time.time() - start_time
        if duration > 10.0:
            log.info('Sign in took %.1f sec' % duration)

    except Exception as e:
        log.exception('', e)

    log.context(None)

    lib.did_sign_in = True


def fixup_cached_ibook_data(lbook):
    if FORMAT_SCRIBD_EBOOK in lbook.ibook.formats:
        # eliminate old format value that may have been cached prior to v1.18.0
        lbook.ibook = None
        return

    if lbook.ibook.provider_id == OverDrive.id and len(lbook.ibook.formats) == 0:
        # eliminate books incorrectly parsed from OverDrive that may have been cached prior to v1.24.0
        lbook.ibook = None
        return

    if lbook.ibook.provider_id == Scribd.id and len(lbook.ibook.formats) == 0:
        # eliminate books whose formats were not checked from the actual book page, from releases prior to v1.25.0
        lbook.ibook = None
        return

    if lbook.ibook.provider_id == Amazon.id and lbook.ibook.language.startswith(":"):
        # eliminate books whose language was incorrect due to Amazon website change, from releases prior to v2.43.0
        lbook.ibook = None
        return


def get_current_availability(abort, log, status, config):
    all_libraries = init_libraries(abort, log, config)[0]

    for book in config.calibre_books:
        book.preserve_orig_links()   # don't disturb regular links

    get_current_avail(abort, log, status, config, config.calibre_books, all_libraries)

    return (config.calibre_books, [])


def get_current_avail(abort, log, status, config, calibre_books, search_libraries):
    # check if books are currently available to borrow and estimate hold time if not

    filter_by_library = False
    lib_wait_priority = {}

    wait_groups = tweaks.get(TWEAK_WAIT_GROUPS, None)
    if wait_groups is not None:
        log.info('%s tweak set to %s' % (TWEAK_WAIT_GROUPS, str(wait_groups)))
        filter_by_library = True
        for priority, lib_group in enumerate(wait_groups):
            for lib_name in lib_group:
                lib_wait_priority[lib_name] = priority

                if lib_name.replace('#', '') not in [lib.name for lib in config.libraries]:
                    log.warn('%s tweak contains unconfigured library name: %s' % (TWEAK_WAIT_GROUPS, lib_name))

    for i, book in enumerate(calibre_books):
        status.update(i / len(calibre_books), str(book))

        wait_weeks_set = set()
        replace_odid = replace_odnid = False

        if (not book.wait_weeks_field) or book.wait_weeks_field == IDENT_AVAILABLE_LINK:
            links_str = book.odid
            replace_odid = True
        elif book.wait_weeks_field == IDENT_NEEDED_LINK:
            links_str = book.odnid
            replace_odnid = True
        else:
            continue

        odlinks = ODLinkSet(links_str, config=config).odlinks

        if len(odlinks) > 0:
            log.info(LOG_SEPERATOR)
            log.info('Getting current availability of %s for %d %s links' % (str(book), len(odlinks), book.wait_weeks_field))
            new_odidlinks = ODLinkSet()

            for odlink in odlinks:
                new_odlink = odlink

                for lib in search_libraries:
                    if lib.provider_id == odlink.provider_id and lib.library_id == odlink.library_id:
                        if lib.enabled or not tweaks.get(TWEAK_CHECK_ONLY_ENABLED_LIB_AVAIL, False):
                            sign_in(log, lib, True)  # Sign in to library, if needed and not yet done

                            log.context('Getting %s current availability at %s' % (odlink.book_id, lib.name))
                            new_book_id = odlink.book_id

                            try:
                                start_time = time.time()

                                # returns either duration, False for book removed, or tuple (duration, replacement book_id)
                                wait_weeks = lib.get_current_book_availability(odlink.book_id)

                                duration = time.time() - start_time
                                if duration > 10.0:
                                    log.info('get_current_book_availability took %.1f sec' % duration)

                            except Exception as e:
                                log.exception('', e)
                                wait_weeks = None       # unknown error - keep link

                            log.context(None)

                            if wait_weeks is False:
                                new_book_id = None
                            else:
                                is_on_hold = False

                                if type(wait_weeks) is tuple:
                                    wait_weeks, is_on_hold, new_book_id = wait_weeks

                                format_and_name = odlink.format_and_name()
                                if (not filter_by_library) or format_and_name in lib_wait_priority:
                                    wait_weeks_str = '%s%s@%s' % (
                                        '%02d' % wait_weeks if wait_weeks is not None else 'XX',
                                        'H' if is_on_hold else '',
                                        format_and_name)
                                    wait_weeks_set.add((lib_wait_priority.get(format_and_name, 9999), wait_weeks_str))
                                else:
                                    log.info('Result filtered by %s tweak at %s' % (TWEAK_WAIT_GROUPS, format_and_name))

                            if new_book_id != odlink.book_id:
                                if new_book_id:
                                    new_odlink = ODLink(provider_id=odlink.provider_id,
                                                        library_id=odlink.library_id, book_id=new_book_id,
                                                        is_audiobook=odlink.is_audiobook, config=config)
                                else:
                                    new_odlink = None

                        if abort.is_set():
                            abort_job()

                        break
                else:
                    log.info('Discarding link to unconfigured library: %s' % str(odlink))
                    new_odlink = None

                if new_odlink is not None:
                    new_odidlinks.add(new_odlink)

            if replace_odid:
                book.odid = str(new_odidlinks)
            elif replace_odnid:
                book.odnid = str(new_odidlinks)

        book.wait_weeks = ', '.join([ob[1] for ob in sorted(list(wait_weeks_set))])


def abort_job():
    #cleanup_ssl()   # special SSL handling
    raise RuntimeWarning('Job aborted')
