﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, John Howell <jhowell@acm.org>'
__docformat__ = 'restructuredtext en'

import time
import cPickle
import copy
import random
from threading import Event
from collections import (defaultdict, namedtuple)

try:
    from PyQt5.Qt import (Qt, QProgressDialog)
except ImportError:
    from PyQt4.Qt import (Qt, QProgressDialog)


from calibre.gui2.threaded_jobs import ThreadedJob
from calibre.gui2 import (Dispatcher, config)
from calibre.utils.config_base import tweaks
from calibre.constants import numeric_version

from calibre_plugins.overdrive_link import ActionOverdriveLink
from calibre_plugins.overdrive_link.match import (alternate_author_names,
    same_author, match_prep, word_from_title, safe_word_from_title,
    primary_author, UNKNOWN, match_book_lists, force_unique_title)
from calibre_plugins.overdrive_link.numbers import (value_unit)
from calibre_plugins.overdrive_link.book import (DiscoveredBook, author_sort_key, need_levels)
from calibre_plugins.overdrive_link.formats import (ALL_READABLE_FORMATS, 
    ALL_LISTENABLE_FORMATS, FORMAT_SCRIBD_EBOOK)
from calibre_plugins.overdrive_link.fixups import fixup_book_data
from calibre_plugins.overdrive_link.link import (IDENT_RECOMMENDABLE_LINK,
    LINK_AVAILABLE, LINK_RECOMMENDABLE, LINK_PURCHASABLE,
    LINK_FORMAT_EBOOK, LINK_FORMAT_AUDIOBOOK, BOOK_DISCOVERED,
    ODLinkSet, linksets_str)
from calibre_plugins.overdrive_link.cache import LibraryBookCache
from calibre_plugins.overdrive_link.log import (ODStatus, ODLog, JobLog, long_log_test)
from calibre_plugins.overdrive_link.net import (set_num_simultaneous_queries, init_ssl, cleanup_ssl)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.tweak import (
    TWEAK_SKIP_LOGIN, TWEAK_INCREMENTAL_AUTHOR_COUNT,
    TWEAK_TRACK_NEEDED_BOOKS, TWEAK_KEEP_SCRIBD_IF_REMOVED, TWEAK_IGNORE_LOGIN_FAILURE,
    TWEAK_WAIT_GROUPS)
from calibre_plugins.overdrive_link.amazon import (Amazon, inventory_amazon_et)
from calibre_plugins.overdrive_link.audible import (Audible)
from calibre_plugins.overdrive_link.axis_360 import (Axis360)
from calibre_plugins.overdrive_link.cloud_library import (CloudLibrary, inventory_cloud_library_sites)
from calibre_plugins.overdrive_link.ebscohost import (EBSCOhost)
from calibre_plugins.overdrive_link.enki import (Enki)
from calibre_plugins.overdrive_link.freading import (Freading, inventory_freading_sites)
from calibre_plugins.overdrive_link.hoopla import (Hoopla)
from calibre_plugins.overdrive_link.overdrive import (OverDrive, inventory_overdrive_sites,
    inventory_overdrive_books)
from calibre_plugins.overdrive_link.one_click_digital import (OneClickDigital)
from calibre_plugins.overdrive_link.open_library import (OpenLibrary, read_openlib_book_online)
from calibre_plugins.overdrive_link.project_gutenberg import (ProjectGutenberg, build_gutenberg_index)
from calibre_plugins.overdrive_link.scribd import (Scribd)


# Initializing these here avoids problems within search jobs
Amazon.register()
Audible.register()
Axis360.register()
CloudLibrary.register()
EBSCOhost.register()
Enki.register()
Freading.register()
Hoopla.register()
OneClickDigital.register()
OpenLibrary.register()
OverDrive.register()
Scribd.register()
ProjectGutenberg.register()


MAX_ALLOWED_LINKS_BY_LIBRARY = 4
MAX_WRONG_AUTHOR_RESULTS = 20

LOG_SEPERATOR = '================================================='

MIN_LOG_TIME = 2.5


def _check_obtainable(abort, log, status, config):
    return check_obtainable(abort, log, status, config) # allow forward reference
    
def _uncache_books(abort, log, status, config):
    return uncache_books(abort, log, status, config) # allow forward reference
    

SpecialJob = namedtuple('SpecialJob', 'name function')

SPECIAL_JOBS = {
    'build_gutenberg_index': SpecialJob('Build Project Gutenberg search index', build_gutenberg_index),
    'check_obtainable': SpecialJob('Check availability of linked books', _check_obtainable),
    'inventory_amazon_et': SpecialJob('Inventory Amazon books for Enhanced Typesetting', inventory_amazon_et),
    'inventory_cloud_library_sites': SpecialJob('Inventory Cloud Library sites', inventory_cloud_library_sites),
    'inventory_freading_sites': SpecialJob('Inventory Freading sites', inventory_freading_sites),
    'inventory_overdrive_sites': SpecialJob('Inventory OverDrive sites', inventory_overdrive_sites),
    'inventory_overdrive_books': SpecialJob('Inventory OverDrive books', inventory_overdrive_books),
    'read_openlib_book_online': SpecialJob('Read Open Library book online', read_openlib_book_online),
    'long_log_test': SpecialJob('Long log test', long_log_test),
    'uncache_books': SpecialJob('Remove cache entries of linked books', _uncache_books),
    }

  
'''
Perform search using a threaded job
'''

def worker_limit():
    return int(config['worker_limit']/2.0)  # from calibre prefs
    
    
class AuthorGroup(object):
    '''
    Holds a group of authors with books in common
    '''
    
    def __init__(self, parent):
        self.parent = parent
        self.authors = set()
        self.primary_authors = set()
        self.parent.groups.add(self)
        # print('AuthorGroup.groups.add(%s)' % unicode(self))
        # self.parent.check()
        
        
    def add(self, author, primary):
        # print('Add %s to %s' % (author, self))
        if author in self.parent.group_of_author:
            # move entire old group to new one
            old_group = self.parent.group_of_author[author]
            if old_group is self:
                return
                
            for a in old_group.authors.copy():
                was_primary = old_group.remove(a)
                self.add(a, primary=was_primary)
            
        else:    
            self.authors.add(author)
            if primary:
                self.primary_authors.add(author)
                
            self.parent.group_of_author[author] = self
            
        # self.parent.check()
    
    def remove(self, author):
        # print('Remove %s from %s' % (author, self))
        self.authors.remove(author)
        
        was_primary = author in self.primary_authors
        if was_primary:
            self.primary_authors.remove(author)
        
        del self.parent.group_of_author[author]
        
        if len(self.authors) == 0:
            # print('AuthorGroup.groups.remove(%s)' % unicode(self))
            self.parent.groups.remove(self)
            
        # self.parent.check()
        return was_primary
    
    def key(self):
        if len(self.authors) == 0:
            raise ValueError('Empty AuthorGroup has no key')
            
        # (-size, least primary author name)
        return (-len(self.authors), author_sort_key(sorted(list(self.primary_authors))[0]))
        
        
class AuthorGroupSet(object):
    '''
    Holds a set of author groups
    '''
    
    def __init__(self):
        self.group_of_author = {}
        self.groups = set()
    
    def group_of(self, author):
        # add to new group if not in one
        group = self.group_of_author.get(author)
        if not group:
            group = AuthorGroup(self)
            group.add(author, primary=True)
            
        return group
    
    def sorted_groups(self):
        return sorted(list(self.groups), key=lambda g: g.key())
        
    def check(self):
        for author, group in self.group_of_author.items():
            if author not in group.authors:
                raise ValueError()
                
        for group in self.groups:
            for author in group.authors:
                if self.group_of_author[author] is not group:
                    raise ValueError()

           

def start_search_jobs(action, config, all_calibre_books, selected_ids, discover_books, orig_discovered_books,
            keywords, library_names, incremental_sequence, callback):
            
    # Organize the selected ids by author(s)
    # Split into multiple groups. Keep authors with books in common together in same group.
    
    author_group_set = AuthorGroupSet()
    selected_ids_by_author = defaultdict(list)
    
    for book in all_calibre_books:
        if book.id in selected_ids:
            book_group = author_group_set.group_of(primary_author(book.authors))
                
            for i, author in enumerate(book.authors[0:min(config.num_authors_to_search, len(book.authors))]):
                selected_ids_by_author[author].append(book.id)
                book_group.add(author, primary=(i == 0))  # all authors for this book must be in the same group
                
                        
                       
    # combine groups into jobs limited by a configured number of authors                    
    
    job_book_ids_by_author_list = []
    job_book_ids_by_author = {}
    job_author_count = 0
    sorted_groups = author_group_set.sorted_groups()
    
    if keywords:
        split_search_author_count = 0
    elif incremental_sequence is None:
        split_search_author_count = config.split_search_author_count
    else:
        split_search_author_count = tweaks.get(TWEAK_INCREMENTAL_AUTHOR_COUNT, config.split_search_author_count)
    
    while len(sorted_groups) > 0:
        if job_author_count == 0 or split_search_author_count == 0:
            # new job or not splitting. Take largest group
            i = 0
            
        else:
            # add largest group that will fit to existing job
            for i,group in enumerate(sorted_groups):
                if job_author_count + len(group.authors) <= split_search_author_count:
                    break   # found the largest group that will fit in this job
                    
            else:
                # cannot add more without exceeding the configured author limit
                job_book_ids_by_author_list.append(job_book_ids_by_author)
                job_book_ids_by_author = {}
                job_author_count = 0
                continue
                
        group = sorted_groups.pop(i)
        # print('adding group of %d: %s ' % (len(group.authors), ', '.join(sorted(list(group.authors)))))
        for author in group.authors:
            job_book_ids_by_author[author] = selected_ids_by_author[author]
            
        job_author_count += len(group.authors)
        
    # handle any remainder
    if (job_author_count > 0) or (len(job_book_ids_by_author_list) == 0):
        # print('queuing job with remainder')
        job_book_ids_by_author_list.append(job_book_ids_by_author)
        
        
        
    # queue jobs with progress indication
    
    job_count = len(job_book_ids_by_author_list)
    progress = QProgressDialog('Preparing ' + value_unit(job_count, 'search job'), 'Cancel', 0, job_count, action.gui)
    progress.setWindowTitle(action.name)
    progress.setWindowFlags(progress.windowFlags()&(~Qt.WindowContextHelpButtonHint))
    progress.setMinimumWidth(400)
    progress.setMinimumDuration(2000)   # Show progress only if taking a while
    progress.setModal(True)
    progress.setValue(0)
    
    for i, job_book_ids_by_author in enumerate(job_book_ids_by_author_list):
        if incremental_sequence is None or (incremental_sequence % len(job_book_ids_by_author_list)) == i:
            start_search_job(
                    action, config, all_calibre_books, job_book_ids_by_author, 
                    discover_books, orig_discovered_books, keywords, library_names, callback,
                    job_count if incremental_sequence is None else 1)
       
        progress.setValue(i + 1)
        if progress.wasCanceled():
            return
        
    progress.reset()
           
            
def start_search_job(action, config, all_calibre_books, selected_ids_by_author,
        discover_books, orig_discovered_books, keywords, library_names, callback, job_count):

    if keywords:
        if keywords in SPECIAL_JOBS:
            desc = SPECIAL_JOBS[keywords].name
        else:
            desc = 'Search for books with keywords: ' + keywords
    else:
        desc = 'Search for books by ' + value_unit(len(selected_ids_by_author), 'author')
    
    # collect selected id across all authors (duplicates ok)
    selected_ids = []
    for ids in selected_ids_by_author.values():
        selected_ids.extend(ids)
    
    job_id = action.register_search_job(selected_ids)
    
    config.clear_comparison_cache()     # clear cache so large data set is not passed to workers

    if config.allow_simultaneous_jobs:
        '''
        This uses a separate process to perform the work (ParallelJob). This allows greater parallelism.
        Pickle objects ahead of time otherwise the automatic unpickle will fail due
        to the plugin not being loaded in time.
        '''
        config.max_simultaneous_jobs = min(worker_limit(), job_count)

        action.gui.job_manager.run_job(Dispatcher(callback), 'arbitrary_n', 
            args=['calibre_plugins.overdrive_link.jobs', 'lending_library_search_process', 
            (job_id, cPickle.dumps(config), cPickle.dumps(all_calibre_books), selected_ids_by_author, 
            discover_books, cPickle.dumps(orig_discovered_books), keywords, library_names)], description=desc)
        
    else:
        '''
        This uses an in-process thread to perform the work. This offers high performance, but can suffer from
        memory leaks and will make the GUI less responsive.
        Make a copy of the config, calibre book and discovered book objects to avoid interaction with later changes by gui.
        '''
        config.max_simultaneous_jobs = 1
   
        job = ThreadedJob(ActionOverdriveLink.name, desc, lending_library_search_thread,
            (job_id, cPickle.dumps(config), cPickle.dumps(all_calibre_books), selected_ids_by_author, 
            discover_books, cPickle.dumps(orig_discovered_books), keywords, library_names), {}, callback)
        action.gui.job_manager.run_threaded_job(job)

    action.gui.status_bar.show_message('Lending library search started', 3000)
    

        
def lending_library_search_process(job_id, pickled_config, pickled_all_calibre_books, selected_ids_by_author, 
                    discover_books, pickled_orig_discovered_books, keywords, library_names,
                    notification=lambda x,y:x):
                    
    abort = Event()
    log = JobLog(ODLog(), [], [], [])
    
    try:
        calibre_books, discovered_books = lending_library_search(
            pickled_config, pickled_all_calibre_books, selected_ids_by_author, 
            discover_books, pickled_orig_discovered_books, keywords, library_names, 
            abort, log, notification.queue)

        return (job_id, calibre_books, discovered_books, log.errors, log.warnings, log.summaries)
        
    except Exception as e:
        log.exception('Search job failure', e)
        return (job_id, [], [], log.errors, log.warnings, log.summaries)


def lending_library_search_thread(job_id, pickled_config, pickled_all_calibre_books, 
            selected_ids_by_author, discover_books, pickled_orig_discovered_books,
            keywords, library_names, abort, log, notifications):
            
    log = JobLog(log, [], [], [])
    
    try:
        calibre_books, discovered_books = lending_library_search(
            pickled_config, pickled_all_calibre_books, selected_ids_by_author,
            discover_books, pickled_orig_discovered_books, keywords, library_names,
            abort, log, notifications)
            
        return (job_id, calibre_books, discovered_books, log.errors, log.warnings, log.summaries)
            
    except Exception as e:
        log.exception('Search job failure', e)
        return (job_id, [], [], log.errors, log.warnings, log.summaries)
        
            
def lending_library_search(pickled_config, pickled_all_calibre_books, selected_ids_by_author,
            discover_books, pickled_orig_discovered_books, keywords, library_names, 
            abort, log, notifications):
    '''
    This function performs the search for books as a job in a separate thread or process.
    '''
    
    
    log.info('%s %s, calibre %s' % (ActionOverdriveLink.name, '.'.join([unicode(v) for v in ActionOverdriveLink.version]),
            '.'.join([unicode(v) for v in numeric_version])))
    
    status = ODStatus(queue=notifications)
    STATUS_RANGE_PREPARE = (0.0, 0.01, 'Preparing for search')
    STATUS_RANGE_SEARCH = (0.01, 0.50, 'search')
    STATUS_RANGE_BOOK = (0.50, 0.99, 'get book')
    STATUS_RANGE_MATCH = (0.99, 1.0, 'match')
    STATUS_RANGE_SPECIAL = (0.01, 1.0, '')
    
    status.subrange(STATUS_RANGE_PREPARE[0], STATUS_RANGE_PREPARE[1], STATUS_RANGE_PREPARE[2])
        
    config = cPickle.loads(pickled_config)
    all_calibre_books = cPickle.loads(pickled_all_calibre_books)
    orig_discovered_books = cPickle.loads(pickled_orig_discovered_books)
    
    if (not hasattr(config, 'plugin_version')) or (config.plugin_version != ActionOverdriveLink.version):
        log.error('Plugin version mismatch. Installation of new plugin version without restarting calibre.')
        return ([], [])
    
    set_num_simultaneous_queries(config.max_simultaneous_jobs)  # set for query pacing
    init_ssl(log) # special SSL handling
    
    if library_names is not None:
        # use alternate set of enabled libraries
        # ok to change config since we are working with a private copy
        config.enabled_libraries = []
        for lib in config.libraries:
            lib.enabled = (lib.name in library_names)
            if lib.enabled:
                config.enabled_libraries.append(lib)
         
        for lib_name in library_names:
            if lib_name not in [lib.name for lib in config.enabled_libraries]:
                log.warn('Search started using unconfigured library name: %s' % lib_name)

    
    if keywords in SPECIAL_JOBS:
        log.info(LOG_SEPERATOR)
        special_status = status.subrange(STATUS_RANGE_SPECIAL[0], STATUS_RANGE_SPECIAL[1], STATUS_RANGE_SPECIAL[2])
        special_status = special_status.update_subrange(0, 1, keywords)
        
        config.calibre_books = all_calibre_books    # save for use in check_obtainable, inventory_amazon_et, uncache_books
        result = SPECIAL_JOBS[keywords].function(abort, log, special_status, config)
        
        cleanup_ssl()   # special SSL handling
        
        if result is not None: return result
        return ([], [])
    
    if config.search_language:
        log.info('Search language: %s' % config.search_language)
        
    log.info('Search formats: %s' % ', '.join(sorted(list(config.search_formats))))
    
    search_libraries = init_libraries(abort, log, config, not tweaks.get(TWEAK_SKIP_LOGIN, False), False)
    found_library_books = set()
    discover_by_keyword = keywords is not None
    discovery_authors = []
    
    search_status = status.subrange(STATUS_RANGE_SEARCH[0], STATUS_RANGE_SEARCH[1], STATUS_RANGE_SEARCH[2])
    
    if discover_by_keyword:
        log.info(LOG_SEPERATOR)
        find_status = search_status.update_subrange(0, 1, 'keywords-' + keywords)
        
        new_library_books = find_library_books_by_keyword(
            abort, log, find_status, config, search_libraries, keywords, discover_books)
            
        if new_library_books:
            found_library_books.update(new_library_books)
            
        searched_calibre_books = []
            
    else:
        searched_ids = set()
        
        all_calibre_books_by_id = {}
        for book in all_calibre_books:
            all_calibre_books_by_id[book.id] = book
    
        for i, author in enumerate(sorted(selected_ids_by_author.keys(), key=author_sort_key)):
            log.info(LOG_SEPERATOR)
            log.info('Finding library books by %s' % author)
            
            find_status = search_status.update_subrange(i, len(selected_ids_by_author), author)
            
            if abort.is_set():
                abort_job()
                
            # Select the books by this author
            authors_books = []
            discover_by_author = False
            for id in selected_ids_by_author[author]:
                searched_ids.add(id)
                
                authors_book = all_calibre_books_by_id[id]
                authors_books.append(authors_book)
                
                if discover_books and authors_book.allow_discovery:
                    discover_by_author = True
                    
            if discover_by_author and (match_prep(author) in config.no_discovery_authors):
                discover_by_author = False

            alt_names = alternate_author_names(author, config, log) # check for variants of the author name
    
            if discover_by_author:
                discovery_authors.extend(alt_names)    # allow discovery by this author
                
            new_library_books = find_library_books_by_author(
                abort, log, find_status, alt_names, config, search_libraries, authors_books,
                discover_by_author)
                    
            if new_library_books:
                found_library_books.update(new_library_books)
        
        searched_calibre_books = [all_calibre_books_by_id[id] for id in searched_ids]
    
    if abort.is_set():
        abort_job()

    # Get more details on found library books and filter out those that don't match search parameters
    log.info(LOG_SEPERATOR)
    get_book_status = status.subrange(STATUS_RANGE_BOOK[0], STATUS_RANGE_BOOK[1], STATUS_RANGE_BOOK[2])
    
    library_books = get_library_book_details(
        abort, log, get_book_status, config, discover_books, discover_by_keyword, 
        searched_calibre_books, found_library_books) 
    
    if abort.is_set():
            abort_job()

    log.info(LOG_SEPERATOR)
    status.subrange(STATUS_RANGE_MATCH[0], STATUS_RANGE_MATCH[1], STATUS_RANGE_MATCH[2])
    
    if not discover_by_keyword:
        report_excessive_wrong_authors(log, config, library_books)
        unmatched_library_books = match_library_and_calibre_books(log, config, searched_calibre_books, library_books)
        matched_calibre_books = [searched_book for searched_book in searched_calibre_books if searched_book.matched]
        
    else:
        unmatched_library_books = library_books 
        matched_calibre_books = []
        
    if discover_books:
        log.info(LOG_SEPERATOR)
        for author in sorted(selected_ids_by_author.keys(), key=author_sort_key):
            if author.lower() not in discovery_authors:
                log.info('Discovery of books by %s is disallowed.' % author)
                #log.info('discovery authors: %s' % ', '.join(discovery_authors))
    
        discovered_books = discover_new_books(
            log, config, unmatched_library_books, all_calibre_books, orig_discovered_books, 
            discovery_authors, discover_by_keyword)
    else:
        discovered_books = []
    
            
    # Check each id for an excessive number of links to the same library.
    # This may indicate a matching failure resulting in multiple library books matched
    # with the same calibre book. This can happen for book series.
    check_for_excessive_links(log, config, matched_calibre_books, discovered=False)
    check_for_excessive_links(log, config, discovered_books, discovered=True)
    
    cleanup_ssl()   # special SSL handling
    
    return (searched_calibre_books, discovered_books)


def init_libraries(abort, log, config, use_credentials, use_all_libraries):
    search_libraries = []
    for lending_lib in (config.libraries if use_all_libraries else config.enabled_libraries):
        lib = SearchableLibrary.create(log, config, lending_lib)
        search_libraries.append(lib)
        lib.signin_required = False
        lib.signed_in = False
        lib.did_sign_in = False
        
        if use_credentials is not None:
            sign_in(log, lib, use_credentials)  # Sign in to library websites, if needed
        
        if abort.is_set():
            abort_job()
            
    return search_libraries
    

def find_library_books_by_keyword(abort, log, status, config, search_libraries, keywords, discover_books):
    # Perform a search using keywords as the title
        
    log.summary('Finding library books with keywords %s' % keywords)
    
    library_books = set()

    if not discover_books:
        return library_books
            
    total_searches = len(search_libraries)
    searches_complete = 0        
        
    for lib in search_libraries:
        find_books_available_and_recommendable(log, library_books, '', keywords, True, config, lib, 
            status, searches_complete, total_searches, [], abort)

        searches_complete += 1
        
        if abort.is_set():
            abort_job()
                
    return library_books
    
    
    
def find_library_books_by_author(
            abort, log, status, alt_names, config, search_libraries, authors_books, discover_books):
    
    '''
    Search lending libraries for a set of calibre books with the same primary author
    Try the search with variations on the author name in order to get maximum results.
    
    Most sites expect author names in "first last" format with optional punctuation.
    '''
    
    total_searches = len(search_libraries)
    searches_complete = 0        
    library_books = set()
        
    for lib in search_libraries:
        for try_author in alt_names:
            if len(authors_books) == 1 and not discover_books:
                # Optimization when searching for a single title with no other book discovery
                cbook = authors_books[0]
                try_title = safe_word_from_title(cbook.title)    # Use partial title to restrict search
            else:
                try_title = ''    # Look for all books by this author
                
            find_books_available_and_recommendable(log, library_books, try_author, try_title, False, config, lib, 
                status, searches_complete, total_searches, authors_books, abort)
                
            if abort.is_set():
                abort_job()
        
        searches_complete += 1
     
    return library_books
    

def only_get_info_libs(libraries):
    # list of libraries that support get_book_info from a larger list/set
    ok = []
    
    for lib in libraries:
        if lib.max_books_per_get_books_info() > 0:
            ok.append(lib)
            
    return ok
 

def get_library_book_details(
        abort, log, status, config, discover_books, discover_by_keyword, 
        searched_calibre_books, library_books):

    '''    
    merge books with same provider and book id (when possible) to reduce lookups
    '''
    
    unique_books = {}
    
    for lbook in library_books:
        if lbook.book_key in unique_books:
            unique_books[lbook.book_key].merge_from(
                lbook, 
                check_same_formats=not SearchableLibrary.provider(lbook.provider_id).allow_format_merge,
                check_add_formats=False)
        else:
            unique_books[lbook.book_key] = lbook
        
    library_books = set(unique_books.values())
    
    '''
    Check for results that match search criteria
    '''
    
    kept_books = set()
    
    for lbook in library_books:
        # if any formats are provided in find phase they cannot be added-to in get book details
        if len(lbook.formats) != 0 and lbook.formats.isdisjoint(config.search_formats):
            log.info('Ignoring (wrong format): %s' % repr(lbook))
        elif config.search_language and lbook.language and config.search_language != lbook.language:
            log.info('Ignoring (wrong language): %s' % repr(lbook))
        else:
            kept_books.add(lbook)
            
    library_books = kept_books
    
    '''
    Get more details for remaining books
    '''
    
    if library_books:
        # Select a library host at random, giving preference to those that actually have the book
        books_to_get = defaultdict(set)
        
        for lbook in library_books:
            lbook.ibook = None
            
            select_from = only_get_info_libs(lbook.available)
            if not select_from:
                select_from = only_get_info_libs(lbook.purchasable)
            if not select_from:
                select_from = only_get_info_libs(lbook.recommendable)
                
            if select_from:
                lib = select_from[random.randint(0, len(select_from)-1)]
                books_to_get[lib].add(lbook)
                
             
        with LibraryBookCache(log, config) as cache:
            # Get additional information on books, grouped by library
            i = 0
            for lib, lbooks in books_to_get.items():
                status.update(i / len(library_books), '%d-books@%s' % (len(lbooks), lib.name))
                sub_status = status.subrange(i / len(library_books), (i + len(lbooks)) / len(library_books), lib.name)
                
                get_library_books_info(abort, log, sub_status, lib, lbooks, config, cache)
                i += len(lbooks)
                
                if abort.is_set():
                    abort_job()
            
            
    for lbook in library_books:
        orig_repr = repr(lbook)
        
        if lbook.ibook is not None:
            lbook.merge_from(lbook.ibook, check_same_formats=False, check_add_formats=True)  # Update book with new information
            
        del lbook.ibook
                
        force_unique_title(log, lbook)  # fix books with incomplete titles
        
        # apply fix ups to known bad data
        fixup_book_data(log, lbook)
        
        # Calibre requires empty title or author to be "Unknown"

        if len(lbook.authors) == 0:
            lbook.authors.append(UNKNOWN)

        if len(lbook.title) == 0:
            lbook.title = UNKNOWN

        new_repr = repr(lbook)
        if new_repr != orig_repr:
            log.info('Updated book info: %s' % new_repr)
            
            
    '''
    Recheck search criteria now that books are finalized
    Keep books with no author since that can happen for anthologies from Freading
    '''
    
    kept_books = set()
    
    for lbook in library_books:
        excluded_auths = list(set([match_prep(author) for author in lbook.authors]) & config.excluded_authors)
        
        if len(lbook.formats) != 0 and lbook.formats.isdisjoint(config.search_formats):
            log.info('Ignoring (wrong format): %s' % repr(lbook))
        elif config.search_language and lbook.language and config.search_language != lbook.language:
            log.info('Ignoring (wrong language): %s' % repr(lbook))
        elif excluded_auths:
            log.info('Ignoring (excluded author %s): %s' % (' & '.join(excluded_auths), repr(lbook)))
        else:
            kept_books.add(lbook)
            
    library_books = kept_books

    '''
    Finalize links
    '''

    for lbook in library_books:
        lbook.create_links(config.search_formats)
        
    return library_books
 
  
def report_excessive_wrong_authors(log, config, library_books):
    '''
    Check for excessive results related to the wrong author
    '''
    
    total_book_counts = defaultdict(lambda: 0)    # by provider id
    wrong_author_counts = defaultdict(lambda: 0)    # by provider id
        
    for lbook in library_books:
        for search_author in lbook.search_authors:
            total_book_counts[(lbook.provider_id, search_author)] += 1
            
            if not (same_author([search_author], lbook.authors, config) or
                    search_author in ' '.join(lbook.authors).lower()):
                # log.info('failed check %s vs book: %s' % (search_author, repr(lbook)))
                wrong_author_counts[(lbook.provider_id, search_author)] += 1
        

    for (provider_id, search_author), count in wrong_author_counts.items():
        if count > MAX_WRONG_AUTHOR_RESULTS:
            log.info('Wrong author returned for %d of %d books searched at %s for %s' % (
                count, total_book_counts[(provider_id, search_author)], 
                SearchableLibrary.provider(provider_id).name, search_author))
                
                
                
def match_library_and_calibre_books(log, config, searched_calibre_books, library_books):
                
    '''
    Match the results from search against the list of books we are trying to locate
    '''
    start_time = time.time()

    unmatched_library_books = library_books.copy()
    
    # look for matches between calibre books and search result books

    for cbook,lbook in match_book_lists(searched_calibre_books, library_books, config):
        lbook.odnid = ''             # set temporary attribute
        
        if tweaks.get(TWEAK_TRACK_NEEDED_BOOKS, False):
            needed, desc = need_levels(cbook.levels_have, lbook.levels_available())
            log.info('For "%s" found match "%s" (%s)'%(repr(cbook), repr(lbook), desc))
            
            if needed:
                lbook.odnid = lbook.odid    # this available link is needed
        else:
            log.info('For "%s" found match "%s"'%(repr(cbook), repr(lbook)))
        
        cbook.update_links(lbook)
        
        del lbook.odnid             # remove temporary attribute
        
        cbook.matched = True
        unmatched_library_books.discard(lbook)
        
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Matching selected books took %.1f sec' % duration)
        
        
    for cbook in searched_calibre_books:
        preserve_links(cbook, config, log)
        cbook.remove_outdated_links()
        
        if not cbook.matched:
            if cbook.links_have_changed():
                log.info('Unmatched book (links removed): %s'%repr(cbook))
                cbook.matched = True
                
            else:
                log.info('Unmatched book (unchanged): %s'%repr(cbook))
                
                
                    
    return unmatched_library_books
    
    
    

def discover_new_books(log, config, unmatched_library_books, all_calibre_books, orig_discovered_books, 
        discovery_authors, discover_by_keyword):

    '''
    Optionally try to determine if the remaining search results are of possible interest to the user
    '''
    
    log.info('Checking %s, %s' % (
        value_unit(len(unmatched_library_books), 'unmatched library book'),
        value_unit(len(discovery_authors), 'author')))
        
    discovered_books = []
    
    if len(discovery_authors) == 0 and not discover_by_keyword:
        return discovered_books
    
    start_time = time.time()
    possible_discovered_books = []
    
    for lbook in unmatched_library_books:
        # Possible new book discovered
        dbook = DiscoveredBook(authors=lbook.authors, title=lbook.title, 
            odid=lbook.odid, odrid=lbook.odrid, odpid=lbook.odpid,
            publisher=lbook.publisher, pubdate=lbook.pubdate, isbn=lbook.isbn, 
            series=lbook.series, series_index=lbook.series_index, language=lbook.language)

        dbook.orig_odid = dbook.orig_odrid = dbook.orig_odpid = ''
        dbook.discard = False
        possible_discovered_books.append(dbook)
        
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Discovery setup took %.1f sec' % duration)
        
    
    # make sure discovered books are not already in the calibre library
    start_time = time.time()
    
    for cbook,dbook in match_book_lists(all_calibre_books, possible_discovered_books, config):
        dbook.discard = True

        # The user already has this book in their calibre library. Check for improvement to availability.
        if (len(ODLinkSet(str=dbook.odid, config=config) - ODLinkSet(str=cbook.orig_odid, config=config)) == 0 and
                    len(ODLinkSet(str=dbook.odrid, config=config) - ODLinkSet(str=cbook.orig_odrid, config=config)) == 0 and
                    len(ODLinkSet(str=dbook.odpid, config=config) - ODLinkSet(str=cbook.orig_odpid, config=config)) == 0):
            log.info('Already in calibre, not selected for search (links ok): %s' % repr(dbook))  # No new availability
            
        elif same_author(discovery_authors, [primary_author(cbook.authors)], config, use_equivalents=False):
            log.warn('Already in calibre, not selected for search (possibly missing links): %s' % repr(dbook)) # Alert the user
            
        else:
            log.info('Already in calibre, not selected for search (possibly missing links): %s' % repr(dbook)) # Alert the user

    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Matching unselected books took %.1f sec' % duration)

    odmatches = defaultdict(set)

    # find all previously discovered books that should be matched if they are still available
    if not discover_by_keyword:
        start_time = time.time()
        for odbook in orig_discovered_books:
            if same_author(discovery_authors, [primary_author(odbook.authors)], config, use_equivalents=False):
                # log.info('Previously discovered by same author(s): %s' % repr(odbook))
                odmatches[odbook]   # create empty set of matches

        duration = time.time() - start_time
        if duration > MIN_LOG_TIME or odmatches:
            log.info('Locating %s by same authors took %.1f sec' % (
                    value_unit(len(odmatches), 'previously discovered book'), duration))
        
        
    # see if any books were previously discovered
    start_time = time.time()
    for odbook,ndbook in match_book_lists(orig_discovered_books, possible_discovered_books, config):
        if ndbook.discard:
            continue
        
        if discover_by_keyword:
            log.info('Previously discovered: %s'%repr(ndbook))
            ndbook.discard = True
            continue
        
        odmatches[odbook].add(ndbook)    # consolidate all newly discovered books that match the same previously discovered book
        
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Matching against previously discovered books took %.1f sec' % duration)
        
        
    start_time = time.time()
    
    # see if the previously discovered books have changed availability    
    for odbook in odmatches:
        # This book was discovered already previously.
        xodbook = copy.deepcopy(odbook)  # make a copy to avoid corrupting the original
        
        xodbook.make_orig_links()       # prior links are "original"

        for ndbook in odmatches[odbook]:
            # To reduce flip-flopping of availability preserve links if the primary author of the book
            # is not an author for the search.
            # Flip-flop still may occur due to matching by isbn overriding rejection by author
            # (or equivalent author).
            if not same_author(discovery_authors, [primary_author(odbook.authors)], config):
                log.info('Previously discovered with non-primary author (add links only): %s'%repr(ndbook))
                xodbook.preserve_all_links()                # preserve original links
        
            xodbook.merge_from(ndbook, update_links=True)   # update with new links
            ndbook.discard = True
        
        preserve_links(xodbook, config, log)
        xodbook.remove_outdated_links()
        
        if xodbook.links_have_changed():
            log.info('Availability changed for previously discovered book: %s'%repr(xodbook))
            xodbook.is_newly_discovered = False
            discovered_books.append(xodbook)
        else:
            log.info('Unchanged previously discovered book: %s'%repr(xodbook))
            
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Checking previously discovered books for link changes took %.1f sec' % duration)
        
    
    # Consolidate newly discovered books with matching author/title
    start_time = time.time()
    for ndbook1,ndbook2 in match_book_lists(possible_discovered_books, possible_discovered_books, config):
        if ndbook1 is ndbook2:
            continue    # books will always match themselves
            
        if ndbook1.discard or ndbook2.discard:
            continue
            
        # update links in one and discard the other
        ndbook1.merge_from(ndbook2, update_links=True)
        ndbook2.discard = True
         
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Consolidating discovered books took %.1f sec' % duration)
        
        
    # Add any newly discovered books
    start_time = time.time()
    
    for ndbook in possible_discovered_books:
        if ndbook.discard:
            continue
            
        ndbook.remove_outdated_links()
        
        if not ndbook.title:
            log.warn('Ignoring ("known" book): %s'%repr(ndbook))
            continue
    
        # Make sure this book has an author we a looking for
        if ((not discover_by_keyword) and (primary_author(ndbook.authors) == UNKNOWN or
                (not same_author(discovery_authors, ndbook.authors, config)))):
            log.info('Ignoring (wrong author): %s'%repr(ndbook))
            continue
            
        # don't save newly discovered books with no links (may be a Scribd non-book, etc.)
        if not (ndbook.odid or ndbook.odrid or ndbook.odpid):
            log.info('Ignoring (no links): %s'%repr(ndbook))
            continue
            
        # Keep this new book
        log.info('New discovered book: %s'%repr(ndbook))
        ndbook.is_newly_discovered = True
        discovered_books.append(ndbook)
        
    duration = time.time() - start_time
    if duration > MIN_LOG_TIME:
        log.info('Adding newly discovered books took %.1f sec' % duration)
        
    return discovered_books        
    
 
def preserve_links(book, config, log):
    idlinks = book.get_idlinks(config)
    orig_idlinks = book.get_orig_idlinks(config)
    
    for ltype in book.idltypes:
        # Treat disabled libraries as if they were found/unchanged
        idlinks[ltype] = idlinks[ltype] | orig_idlinks[ltype].disabled()
        
        if config.search_formats.isdisjoint(ALL_READABLE_FORMATS):
            # Not searching e-books so keep all e-book links
            idlinks[ltype] = idlinks[ltype] | orig_idlinks[ltype].ebooks()
            
        if config.search_formats.isdisjoint(ALL_LISTENABLE_FORMATS):
            # Not searching audiobooks so keep all audiobook links
            idlinks[ltype] = idlinks[ltype] | orig_idlinks[ltype].audiobooks()

    
    preserve_ltypes = set()
    
    if not config.check_recommendable:
        preserve_ltypes.add(IDENT_RECOMMENDABLE_LINK)    # retain links when not doing that type of search
        
    if log.has_errors():
        preserve_ltypes.update(book.idltypes)  # prevent loss of any existing links on search error
        
    for ltype in preserve_ltypes:
        idlinks[ltype] = idlinks[ltype] | orig_idlinks[ltype]

        
    if (not config.unlink_if_removed) or tweaks.get(TWEAK_KEEP_SCRIBD_IF_REMOVED, False):
        lost_idlinks = {}
        any_lost = False
        for ltype in book.idltypes:
            if not config.unlink_if_removed:
                lost_idlinks[ltype] = orig_idlinks[ltype] - idlinks[ltype]  # keep all links
            else:
                lost_idlinks[ltype] = orig_idlinks[ltype].has_provider_id(Scribd.id) - idlinks[ltype]   # keep only Scribd

            any_lost = any_lost or (len(lost_idlinks[ltype]) > 0)
            
        if any_lost:
            log.warn('Links no longer found (will be kept) for %s: %s' % (
                unicode(book), linksets_str(lost_idlinks)))
                
            for ltype in book.idltypes:
                idlinks[ltype] = idlinks[ltype] | lost_idlinks[ltype] # preserve missing
     
    book.set_idlinks(idlinks)
    book.set_orig_idlinks(orig_idlinks)
            

def find_books_available_and_recommendable(log, books, search_author, search_title, keyword_search, config, lib, 
        search_status, searches_complete, total_searches, authors_books, abort): 
   
    search_status.update(searches_complete / total_searches, '%s@%s'%(search_author, lib.name))
    find_books_with_limit_retry(log, books, search_author, search_title, keyword_search, False, config, lib, authors_books, abort)
            
    if config.check_recommendable and lib.supports_recommendation and lib.recommendation_allowed:
        if abort.is_set():
            abort_job()

        search_status.update(searches_complete / total_searches, '%s!%s'%(search_author, lib.name))
        find_books_with_limit_retry(log, books, search_author, search_title, keyword_search, True, config, lib, authors_books, abort)
        
                

def find_books_with_limit_retry(log, books, search_author, search_title, keyword_search, find_recommendable, config, lib,
        authors_books, abort):

    if find_books_at_lib(log, books, search_author, search_title, keyword_search, find_recommendable, config, lib):
        # Search result limit exceeded using just author. Retry search with individual titles.
        #log.info('Limit exceeded - possible search by title')
        if search_author and (not search_title) and lib.title_used_in_search:
            try_titles = set()
            for cbook in authors_books:
                #log.info('  try titles %s -> %s' %(cbook.title, word_from_title(cbook.title)))
                try_titles.add(word_from_title(cbook.title))   # Use partial title to restrict search 
            
            for try_title in try_titles:        
                if abort.is_set():
                    abort_job()
                    
                if try_title:
                    find_books_at_lib(log, books, search_author, try_title, keyword_search, find_recommendable, config, lib)



def find_books_at_lib(log, books, search_author, search_title, keyword_search, find_recommendable, config, lib):
    '''
    Search library for books that match an author/title (or subsets thereof) and return the found matches with identifiers.
    books = Set of Books to be updated
    Return True if number of results limit exceeded
    '''
    
    MAX_RESULTS_ALLOWED = 200
    limit_exceeded = False   
    
    if len(search_author) <= 2 and not search_title:
        return limit_exceeded   # Very short author names return bad results
        
    if find_recommendable and not (lib.supports_recommendation and lib.recommendation_allowed):
        return limit_exceeded   # Not supported
     
    if lib.signin_required and (not lib.signed_in) and (not tweaks.get(TWEAK_IGNORE_LOGIN_FAILURE, False)):
        log.info('Cannot perform search at %s due to sign in failure' % lib.name)
        return limit_exceeded   # Sign in falure
            
    if len(lib.formats_supported & config.search_formats) == 0:
        log.info('Skipping %s: Supports none of the configured book formats' % lib.name)
        return limit_exceeded   # No search formats supported
    
    desc = 'Search %s %sfor %s by %s'%(lib.name, 'recommendable ' if find_recommendable else '', 
            search_title if search_title else 'all', search_author if search_author else 'all')
            
    new_books = set()
    log.info(desc)
    log.context(desc)
    try:
        start_time = time.time()
        limit_exceeded = lib.find_books(new_books, search_author, search_title, keyword_search, find_recommendable)
        log.info('Found %s in %.1f sec' % (value_unit(len(new_books), 'book'), time.time() - start_time))

    except Exception as e:
        log.exception('', e)
        
    if limit_exceeded:
        if len(new_books) > MAX_RESULTS_ALLOWED:
            # discard excessive results as likely incorrect
            log.info('Excessive number of results returned by search. Results discarded.')
        else:
            log.info('Excessive number of results returned by search. Results kept.')
            books.update(new_books)
        
    else:
        books.update(new_books)
        
    log.context(None)
    
    return limit_exceeded
        
                            
def sign_in(log, lib, use_credentials):
    if lib.did_sign_in:
        return

    log.context('Signing in to %s' % (lib.name))
    try:
        start_time = time.time()
        
        lib.sign_in(use_credentials)
    
        duration = time.time() - start_time
        if duration > 10.0:
            log.info('Sign in took %.1f sec' % duration)
            
        
    except Exception as e:
        log.exception('', e)
        
    log.context(None)
    lib.did_sign_in = True
    

def get_library_books_info(abort, log, status, lib, lbooks, config, cache):
    # Obtain additional information about books from a library
    get_book_ids = {}
    
    for lbook in lbooks:
        if lbook.allow_get_from_cache:
            lbook.ibook = cache.get_book(lbook.book_key)
        else:
            lbook.ibook = None
             
        if lbook.ibook is not None:
            fixup_cached_ibook_data(lbook)
                
        if lbook.ibook is None:
            if lbook.book_id in get_book_ids: raise AssertionError    # should be no duplicate book ids
            get_book_ids[lbook.book_id] = lbook
            
    
    if get_book_ids:        
        remaining_get_ids = get_book_ids.keys()
        max_per_get = lib.max_books_per_get_books_info()
        i = 0
        ibooks = []
        lib.cache = cache
        
        while len(remaining_get_ids) > 0:
            get_ids = remaining_get_ids[0:max_per_get]
            status.update(i / len(get_book_ids), get_ids[0])
            log.context('Getting %s at %s'%(', '.join(get_ids), lib.name))
            
            try:
                start_time = time.time()
            
                ibooks.extend(lib.get_books_info(get_ids))
                
                duration = time.time() - start_time
                if duration > 10.0:
                    log.info('Get book info took %.1f sec' % duration)
                    
            except Exception as e:
                log.exception('', e)
                
            log.context(None)
            remaining_get_ids = remaining_get_ids[max_per_get:]
            i += max_per_get

        del lib.cache
        
        for ibook in ibooks:
            if ibook is not None:
                if ibook.cache_allowed:
                    cache.save_book(ibook)
                
                lbook = get_book_ids.get(ibook.book_id, None)
                if lbook:
                    lbook.ibook = ibook
                else:
                    log.error('Unexpected book_id returned: %s' % repr(ibook))
        
            
def fixup_cached_ibook_data(lbook):
    if FORMAT_SCRIBD_EBOOK in lbook.ibook.formats:
        # eliminate old format value that may have been cached prior to v1.18.0
        lbook.ibook = None
        return
        
    if lbook.ibook.provider_id == OverDrive.id and len(lbook.ibook.formats) == 0:
        # eliminate books incorrectly parsed from OverDrive that may have been cached prior to v1.24.0
        lbook.ibook = None
        return

    if lbook.ibook.provider_id == Scribd.id and len(lbook.ibook.formats) == 0:
        # eliminate books whose formats were not checked from the actual book page, from releases prior to v1.25.0
        lbook.ibook = None
        return


def check_obtainable(abort, log, status, config):
    # check if books are currently available to borrow and estimate hold time if not
    track_needed_books = tweaks.get(TWEAK_TRACK_NEEDED_BOOKS, False)
    search_libraries = init_libraries(abort, log, config, None, track_needed_books)     # delay sign in
    
    filter_by_library = False
    lib_wait_priority = {}
    
    wait_groups = tweaks.get(TWEAK_WAIT_GROUPS, None)
    if wait_groups is not None:
        filter_by_library = True
        for priority,lib_group in enumerate(wait_groups):
            for lib_name in lib_group:
                lib_wait_priority[lib_name] = priority
                
                if lib_name.replace('#','') not in [lib.name for lib in config.libraries]:
                    log.warn('%s tweak contains unconfigured library name: %s' % (TWEAK_WAIT_GROUPS, lib_name))
                
            
    for i,book in enumerate(config.calibre_books):
        status.update(i / len(config.calibre_books), unicode(book))
        
        book.preserve_all_links()   # don't disturb regular links
        obtainables = set()
        
        odlinks = ODLinkSet(str=book.orig_odid, config=config).enabled().odlinks
        
        if track_needed_books:
            odlinks.update(ODLinkSet(str=book.orig_odnid, config=config).configured().odlinks)
        
        if len(odlinks) > 0:
            log.info(LOG_SEPERATOR)
            log.info('Checking current availability for %s' % unicode(book))
        
            for odlink in odlinks:
                format_and_name = odlink.format_and_name()
                if (not filter_by_library) or format_and_name in lib_wait_priority:
                    for lib in search_libraries:
                        if lib.provider_id == odlink.provider_id and lib.library_id == odlink.library_id:
                            sign_in(log, lib, lib.sign_in_affects_check_obtainable)  # Sign in to library, if needed and not yet done
                            
                            log.context('Checking %s obtainable at %s' % (odlink.book_id, lib.name))
                            
                            try:
                                start_time = time.time()
                            
                                obtainable = lib.check_book_obtainable(odlink.book_id)
                                
                                duration = time.time() - start_time
                                if duration > 10.0:
                                    log.info('Check obtainable took %.1f sec' % duration)
                                    
                            except Exception as e:
                                log.exception('', e)
                                obtainable = None
                                
                            log.context(None)
                            
                            obtainable_str = '%s@%s' % ('%02d' % obtainable if obtainable is not None else 'XX', format_and_name)
                            obtainables.add((lib_wait_priority.get(format_and_name, 9999), obtainable_str))
                            
                        if abort.is_set():
                            abort_job()
                            
        
        book.obtainable = ', '.join([ob[1] for ob in sorted(list(obtainables))])
                    
    return (config.calibre_books, [])
    

def uncache_books(abort, log, status, config):
    # remove cache entries associated with links for the selected books
    
    with LibraryBookCache(log, config) as cache:
        for i,book in enumerate(config.calibre_books):
            status.update(i / len(config.calibre_books), unicode(book))
            
            odlinks = ODLinkSet(str=book.orig_odid, config=config, book_key=True).odlinks
            odlinks.update(ODLinkSet(str=book.orig_odnid, config=config, book_key=True).odlinks)
            odlinks.update(ODLinkSet(str=book.orig_odpid, config=config, book_key=True).odlinks)
            
            for odlink in odlinks:
                cache.delete_item(unicode(odlink))
    

def check_for_excessive_links(log, config, books, discovered):
    # Check each id for an excessive number of links to the same library.
    # This may indicate a matching failure resulting in multiple library books matched
    # with the same calibre book. This can happen for book series.
    
    if books:
        for book in books:
            report_excessive_links(log, config, book.odid, book.orig_odid, unicode(book), LINK_AVAILABLE, discovered)
            report_excessive_links(log, config, book.odrid, book.orig_odrid, unicode(book), LINK_RECOMMENDABLE, discovered)
            report_excessive_links(log, config, book.odpid, book.orig_odpid, unicode(book), LINK_PURCHASABLE, discovered)
        
        
        
        
def report_excessive_links(log, config, odxid, orig_odxid, book_name, type, discovered):
    link_count_by_library = defaultdict(lambda: 0)
    
    for odlink in ODLinkSet(str=odxid, config=config).odlinks:
        link_count_by_library[(odlink.provider_id, odlink.library_id, odlink.is_audiobook)] += 1
        
    orig_link_count_by_library = defaultdict(lambda: 0)
    for odlink in ODLinkSet(str=orig_odxid, config=config).odlinks:
        orig_link_count_by_library[(odlink.provider_id, odlink.library_id, odlink.is_audiobook)] += 1
        


    for (provider_id, library_id, is_audiobook),count in link_count_by_library.items():
        orig_count = orig_link_count_by_library.get((provider_id, library_id, is_audiobook), 0)
        
        if count > orig_count and count > SearchableLibrary.provider(provider_id).max_expected_links_per_book:
            desc = '%s%s (%s%s%s)'%(BOOK_DISCOVERED if discovered else '', book_name, 
                type, LINK_FORMAT_AUDIOBOOK if is_audiobook else LINK_FORMAT_EBOOK, 
                config.library_name(provider_id, library_id))
                
            log.warn('%s has %d links to same library. Possible incorrect book linkage.' % (desc, count))

            
def abort_job():
    cleanup_ssl()   # special SSL handling
    raise RuntimeWarning('Job aborted')
