#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
__docformat__ = 'restructuredtext en'

import re, time
from collections import OrderedDict, defaultdict
from PyQt4.Qt import QModelIndex
from calibre import prints
from calibre.constants import DEBUG
from calibre.utils.config import tweaks

DUPLICATE_SEARCH_FOR_BOOK = 'BOOK'
DUPLICATE_SEARCH_FOR_AUTHOR = 'AUTHOR'

TITLE_DESCS = OrderedDict([
               ('identical',_('<b>Book duplicate search</b><br/>'
                             '- Find groups of books with an <b>identical title</b> and %s<br/>'
                             '- Titles must match exactly excluding case.<br/>'
                             '- Marking a group as exempt will prevent those specific books '
                             'from appearing together in future duplicate book searches.')),
               ('similar',  _('<b>Book duplicate search</b><br/>'
                             '- Find groups of books with a <b>similar title</b> and %s<br/>'
                             '- Similar title matches apply removal of common punctuation and '
                             'prefixes and applies the same title matching logic as Automerge.<br/>'
                             '- Marking a group as exempt will prevent those specific books '
                             'from appearing together in future duplicate book searches.')),
               ('soundex',  _('<b>Book duplicate search</b><br/>'
                             '- Find groups of books with a <b>soundex title</b> and %s<br/>'
                             '- Soundex title matches are based on the same removal of punctuation '
                             'and common prefixes as a similar title search.<br/>'
                             '- Marking a group as exempt will prevent those specific books '
                             'from appearing together in future duplicate book searches.')),
               ('fuzzy',    _('<b>Book duplicate search</b><br/>'
                             '- Find groups of books with a <b>fuzzy title</b> and %s<br/>'
                             '- Fuzzy title matches remove all punctuation, subtitles '
                             'and any words after \'and\' or \'or\' or \'aka\' in the title.<br/>'
                             '- Marking a group as exempt will prevent those specific books '
                             'from appearing together in future duplicate book searches.')),
               ('ignore',   _('<b>Author duplicate search</b><br/>'
                             '- Find groups of books <b>ignoring title</b> with %s<br/>'
                             '- Ignore title searches are best to find variations of author '
                             'names regardless of the books you have for each.<br/>'
                             '- Marking a group as exempt will prevent any books by those authors '
                             'from appearing together in future duplicate author searches.'))
               ])

AUTHOR_DESCS = OrderedDict([
                ('identical',_('an <b>identical author</b>.<br/>'
                              '- Authors must match exactly excluding case.')),
                ('similar',  _('a <b>similar author</b>.<br/>'
                              '- Similar authors differ only in '
                              'punctuation or order of their names.')),
                ('soundex',  _('a <b>soundex author</b>.<br/>'
                              '- Soundex author matches start with the same removal '
                              'of punctuation and ordering as a similar author search.')),
                ('fuzzy',    _('a <b>fuzzy match author</b>.<br/>'
                              '- Fuzzy author matches compare using their '
                              'surnames and only the first initial.')),
                ('ignore',   _('<b>ignoring the author</b>.'))
               ])


def authors_to_list(db, book_id):
    authors = db.authors(book_id, index_is_id=True)
    if authors:
        return [a.strip().replace('|',',') for a in authors.split(',')]
    return []

def get_first_author(db, book_id):
    authors = authors_to_list(db, book_id)
    if authors:
        return authors[0]
    return None

def fuzzy_it(text, patterns=None):
    fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
                [
                    (r'[\[\](){}<>\'";,:#]', ''),
                    (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
                    (r'[-._]', ' '),
                    (r'\s+', ' ')
                ]]
    if not patterns:
        patterns = fuzzy_title_patterns
    text = text.strip().lower()
    for pat, repl in patterns:
        text = pat.sub(repl, text)
    return text.strip()

def soundex(name, len=6):
    '''
    soundex module conforming to Knuth's algorithm
    implementation 2000-12-24 by Gregory Jorgensen
    public domain
    http://code.activestate.com/recipes/52213-soundex-algorithm/
    '''
    # digits holds the soundex values for the alphabet
    digits = '01230120022455012623010202'
    sndx = ''
    fc = ''
    orda = ord('A')
    ordz = ord('Z')

    # translate alpha chars in name to soundex digits
    for c in name.upper():
        ordc = ord(c)
        if ordc >= orda and ordc <= ordz:
            if not fc: fc = c   # remember first letter
            d = digits[ordc-orda]
            # duplicate consecutive soundex digits are skipped
            if not sndx or (d != sndx[-1]):
                sndx += d

    # replace first digit with first alpha character
    sndx = fc + sndx[1:]

    # remove all 0s from the soundex code
    sndx = sndx.replace('0','')

    # return soundex code padded to len characters
    return (sndx + (len * '0'))[:len]


def get_title_tokens(title, strip_subtitle=True):
    '''
    Take a title and return a list of tokens useful for an AND search query.
    Excludes subtitles (optionally), punctuation and a, the.
    '''
    if title:
        # strip sub-titles
        if strip_subtitle:
            subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
            if len(subtitle.sub('', title)) > 1:
                title = subtitle.sub('', title)

        title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
        [
            # Remove things like: (2010) (Omnibus) etc.
            (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
            # Remove any strings that contain the substring edition inside
            # parentheses
            (r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
            # Remove commas used a separators in numbers
            (r'(\d+),(\d+)', r'\1\2'),
            # Remove hyphens only if they have whitespace before them
            (r'(\s-)', ' '),
            # Remove single quotes not followed by 's'
            (r"'(?!s)", ''),
            # Replace other special chars with a space
            (r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
        ]]

        for pat, repl in title_patterns:
            title = pat.sub(repl, title)

        tokens = title.split()
        for token in tokens:
            token = token.strip()
            if token and (token.lower() not in ('a', 'the')):
                yield token.lower()


def get_author_tokens(authors, only_first_author=True):
    '''
    Take a list of authors and return a list of tokens useful for an
    AND search query. This function tries to return tokens in
    first name middle names last name order, by assuming that if a comma is
    in the author name, the name is in lastname, other names form.
    '''

    ignore_suffixes = ['von', 'jr', 'sr', 'i', 'ii' 'iii', 'second', 'third',
                       'md', 'phd']
    if authors:
        # Leave ' in there for Irish names
        remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
        replace_pat = re.compile(r'[-+.:;]')
        if only_first_author:
            authors = authors[:1]
        for au in authors:
            au = replace_pat.sub(' ', au)
            parts = au.split()
            if ',' in au:
                # au probably in ln, fn form
                parts = parts[1:] + parts[:1]
            for tok in parts:
                tok = remove_pat.sub('', tok).strip()
                if len(tok) > 0 and tok.lower() not in ignore_suffixes:
                    yield tok.lower()

# --------------------------------------------------------------
#           Title Matching Algorithm Functions
# --------------------------------------------------------------

def identical_title_match(title):
    return title.lower()

def similar_title_match(title):
    return fuzzy_it(title)

def soundex_title_match(title):
    # Convert to an equivalent of "similar" title first before applying the soundex
    title = similar_title_match(title)
    return soundex(title, len=6)

def fuzzy_title_match(title):
    title_tokens = list(get_title_tokens(title))
    # We will strip everything after "and", "or" provided it is not first word in title - this is very aggressive!
    for i, tok in enumerate(title_tokens):
        if tok in ['&', 'and', 'or', 'aka'] and i > 0:
            title_tokens = title_tokens[:i]
            break
    return ''.join(title_tokens)

# --------------------------------------------------------------
#           Author Matching Algorithm Functions
# --------------------------------------------------------------

def identical_author_match(authors):
    return ','.join(authors).lower()

def similar_author_match(authors):
    if not authors:
        return ''
    author_tokens = list(get_author_tokens(authors, only_first_author=True))
    return ' '.join(author_tokens)

def soundex_author_match(authors):
    # Convert to an equivalent of "similar" author first before applying the soundex
    author_tokens = list(get_author_tokens(authors, only_first_author=True))
    # We are reversing the author names as want the soundex to focus on surname
    return soundex(''.join(reversed(author_tokens)), len=8)

def fuzzy_author_match(authors):
    if not authors:
        return ''
    author_tokens = list(get_author_tokens(authors, only_first_author=True))
    if len(author_tokens) > 1:
        author_tokens = [author_tokens[0][0], author_tokens[-1]]
    return ''.join(author_tokens)


# --------------------------------------------------------------
#                    Algorithm Classes
# --------------------------------------------------------------


class AlgorithmBase(object):
    '''
    All duplicate search algorithms should inherit from this class
    '''
    def __init__(self, gui, exemptions_map):
        self.gui = gui
        self.model = self.gui.library_view.model()
        self.db = self.model.db
        self._exemptions_map = exemptions_map

    def duplicate_search_mode(self):
        return DUPLICATE_SEARCH_FOR_BOOK

    def run_duplicate_check(self, sort_groups_by_title=True):
        '''
        The entry point for running the algorithm
        '''
        book_ids = self.get_book_ids_to_consider()
        start = time.time()
        candidates_map = defaultdict(set)
        self.gui.status_bar.showMessage(_('Analysing %d books for duplicates...') % (len(book_ids),))
        for book_id in book_ids:
            self.find_candidate(book_id, candidates_map)

        # Perform a quick pass through removing all groups with < 2 authors
        for key in list(candidates_map.keys()):
            if len(candidates_map[key]) < 2:
                del candidates_map[key]
        # Now ask for these candidate groups to be ordered so that our numbered
        # groups will have some kind of consistent order to them.
        candidates_map = self.sort_candidate_groups(candidates_map, sort_groups_by_title)

        # Convert our dictionary of potential candidates into sets of more than one
        books_for_groups_map, groups_for_book_map = self.convert_candidates_to_groups(candidates_map)
        if DEBUG:
            prints('Completed duplicate analysis in:', time.time() - start)
            prints('Found %d duplicate groups covering %d books:'%(len(books_for_groups_map),
                                                                   len(groups_for_book_map)))
        return books_for_groups_map, groups_for_book_map

    def get_book_ids_to_consider(self):
        '''
        Default implementation will iterate over the entire library
        '''
        rows = xrange(self.model.rowCount(QModelIndex()))
        book_ids = map(self.model.id, rows)
        return book_ids

    def find_candidate(self, book_id, candidates_map):
        '''
        Derived classes must provide an implementation
        '''
        pass

    def convert_candidates_to_groups(self, candidates_map):
        '''
        Given a dictionary keyed by some sort of common duplicate group
        key (like a fuzzy of title/author) remove all of the groups that
        have less than two members, repartition as required for any
        duplicate exemptions and return as a tuple of:
          (books_for_group_map, groups_for_book_map)
        books_for_group_map - for each group id, contains a list of book ids
        groups_for_book_map - for each book id, contains a list of group ids
        '''
        books_for_group_map = dict()
        groups_for_book_map = defaultdict(set)
        group_id = 0
        for key in candidates_map.keys():
            book_ids = candidates_map[key]
            partition_groups = self.partition_using_exemptions(book_ids)
            for partition_group in partition_groups:
                if len(partition_group) > 1:
                    group_id += 1
                    partition_book_ids = self.get_book_ids_for_candidate_group(partition_group)
                    books_for_group_map[group_id] = partition_book_ids
                    for book_id in partition_book_ids:
                        groups_for_book_map[book_id].add(group_id)
        return books_for_group_map, groups_for_book_map

    def get_book_ids_for_candidate_group(self, candidate_group):
        '''
        Return the book ids representing this candidate group
        Default implementation is given a book ids so just return them
        '''
        return candidate_group

    def sort_candidate_groups(self, candidates_map, by_title=True):
        '''
        Responsible for returning an ordered dict of how to order the groups
        Default implementation will just sort by the fuzzy key of our candidates
        '''
        if by_title:
            skeys = sorted(candidates_map.keys())
        else:
            skeys = sorted(candidates_map.keys(),
                       key=lambda ckey: '%04d%s' % (len(candidates_map[ckey]), ckey),
                       reverse=True)
        return OrderedDict([(key, candidates_map[key]) for key in skeys])

    def partition_using_exemptions(self, data_items):
        '''
        Given a set of data items, see if any of these combinations should
        be excluded due to being marked as not duplicates of each other
        If we find items that should not appear together, then we will
        repartition into multiple groups. Returns a list where each item
        is a sublist containing the data items for that partitioned group.
        '''
        data_items = sorted(data_items)
        # Initial condition -- the group contains 1 set of all elements
        results = [set(data_items)]
        partitioning_ids = [None]
        # Loop through the set of duplicates, checking to see if the entry is in a non-dup set
        for one_dup in data_items:
            if one_dup in self._exemptions_map:
                # The entry is indeed in a non-dup set. We may need to partition
                for i,res in enumerate(results):
                    if one_dup in res:
                        # This result group contains the item with a non-dup set. If the item
                        # was the one that caused this result group to partition in the first place,
                        # then we must not partition again or we will make subsets of the group
                        # that split this partition off. Consider a group of (1,2,3,4) and
                        # non-dups of [(1,2), (2,3)]. The first partition will give us (1,3,4)
                        # and (2,3,4). Later when we discover (2,3), if we partition (2,3,4)
                        # again, we will end up with (2,4) and (3,4), but (3,4) is a subset
                        # of (1,3,4). All we need to do is remove 3 from the (2,3,4) partition.
                        if one_dup == partitioning_ids[i]:
                            results[i] = (res - self._exemptions_map[one_dup]) | set([one_dup])
                            continue
                        # Must partition. We already have one partition, the one in our hand.
                        # Remove the dups from it, then create new partitions for each of the dups.
                        results[i] = (res - self._exemptions_map[one_dup]) | set([one_dup])
                        for nd in self._exemptions_map[one_dup]:
                            # Only partition if the duplicate is larger than the one we are looking at.
                            # map[2] == (3, 10), and map[3] == (2, 10) and map[10] = (2,3).
                            # We know that when processing the set for 3, we have already done
                            # the work for the element 2.
                            if nd > one_dup and nd in res:
                                results.append((res - self._exemptions_map[one_dup] - set([one_dup]))
                                                | set([nd]))
                                partitioning_ids.append(nd)
        sr = []
        for r in results:
            if len(r) > 1:
                sr.append(sorted(list(r)))
        sr.sort()
        return sr


class ISBNAlgorithm(AlgorithmBase):
    '''
    This algorithm simply finds books that have duplicate ISBN values
    '''
    def get_book_ids_to_consider(self):
        '''
        Override base function as we will only consider books that have an ISBN
        rather than every book in the library.
        '''
        return self.model.db.data.search_getting_ids('isbn:True', search_restriction='')

    def find_candidate(self, book_id, candidates_map):
        isbn = self.db.isbn(book_id, index_is_id=True)
        candidates_map[isbn].add(book_id)

    def sort_candidate_groups(self, candidates_map, by_title=True):
        '''
        Responsible for returning an ordered dict of how to order the groups
        Override to just do a fuzzy title sort to give a better sort than by ISBN
        '''
        title_map = {}
        for key in candidates_map.keys():
            book_id = list(candidates_map[key])[0]
            title_map[key] = similar_title_match(self.db.title(book_id, index_is_id=True))
        if by_title:
            skeys = sorted(candidates_map.keys(), key=lambda isbn: title_map[isbn])
        else:
            skeys = sorted(candidates_map.keys(),
                       key=lambda isbn: '%04d%s' % (len(candidates_map[isbn]), ckey),
                       reverse=True)
        return OrderedDict([(isbn, candidates_map[isbn]) for isbn in skeys])


class TitleAuthorAlgorithm(AlgorithmBase):
    '''
    This algorithm is used for all the permutations requiring
    some evaluation of book titles and an optional author evaluation
    '''
    def __init__(self, gui, book_exemptions_map, title_eval, author_eval=None):
        AlgorithmBase.__init__(self, gui, exemptions_map=book_exemptions_map)
        self._title_eval = title_eval
        self._author_eval = author_eval

    def find_candidate(self, book_id, candidates_map):
        title_match = self._title_eval(self.db.title(book_id, index_is_id=True))
        author_match = ''
        if self._author_eval:
            author_match = self._author_eval(authors_to_list(self.db, book_id))
        candidates_map[title_match+author_match].add(book_id)


class AuthorOnlyAlgorithm(AlgorithmBase):
    '''
    This algorithm is used for all the permutations requiring
    some evaluation of authors without considering the book titles.
    '''
    def __init__(self, gui, author_exemptions_map, author_eval):
        AlgorithmBase.__init__(self, gui, exemptions_map=author_exemptions_map)
        self._author_eval = author_eval
        self._author_bookids_map = defaultdict(set)

    def duplicate_search_mode(self):
        return DUPLICATE_SEARCH_FOR_AUTHOR

    def find_candidate(self, book_id, candidates_map):
        '''
        Override the base implementation because it differs in several ways:
        - Our candidates map contains authors per key, not book ids
        - Our exclusions are per author rather than per book
        '''
        authors = authors_to_list(self.db, book_id)
        if not authors:
            # A book with no authors will not be considered
            return
        author = authors[0]
        author_match = self._author_eval(authors)
        self._author_bookids_map[author].add(book_id)
        candidates_map[author_match].add(author)

    def get_book_ids_for_candidate_group(self, candidate_group):
        '''
        Override as our candidate group contains a list of authors
        We need to lookup the book ids for each author to build our set
        '''
        book_ids = set()
        for author in candidate_group:
            book_ids |= self._author_bookids_map[author]
        return sorted(list(book_ids))


# --------------------------------------------------------------
#                    Algorithm Factory
# --------------------------------------------------------------


def get_title_algorithm_fn(title_match):
    '''
    Return the appropriate function for the desired title match
    '''
    if title_match == 'identical':
        return identical_title_match
    if title_match == 'similar':
        return similar_title_match
    if title_match == 'fuzzy':
        return fuzzy_title_match
    if title_match == 'soundex':
        return soundex_title_match
    return None


def get_author_algorithm_fn(author_match):
    '''
    Return the appropriate function for the desired author match
    '''
    if author_match == 'identical':
        return identical_author_match
    if author_match == 'similar':
        return similar_author_match
    if author_match == 'soundex':
        return soundex_author_match
    if author_match == 'fuzzy':
        return fuzzy_author_match
    return None


def create_algorithm(gui, bex_map, aex_map, title_match, author_match):
    '''
    Our factory responsible for returning the appropriate algorithm
    based on the permutation of title/author matching desired.
    Returns a tuple of the algorithm and a summary description
    '''
    if title_match == 'ignore' and author_match == 'ignore':
        return ISBNAlgorithm(gui, bex_map), \
               _('matching ISBN only')
    elif title_match == 'ignore':
        return AuthorOnlyAlgorithm(gui, aex_map, get_author_algorithm_fn(author_match)), \
               _('ignore title, %s author') % author_match

    else:
        return TitleAuthorAlgorithm(gui, bex_map,
                                    get_title_algorithm_fn(title_match),
                                    get_author_algorithm_fn(author_match)), \
               _('%s title, %s author') % (title_match, author_match)


# --------------------------------------------------------------
#                        Test Code
# --------------------------------------------------------------


def _assert(test_name, fn, value1, value2, equal=True):
    hash1 = fn(value1)
    hash2 = fn(value2)
    if (equal and hash1 != hash2) or (not equal and hash1 == hash2):
        prints('Failed: %s(\'%s\', \'%s\')'% (test_name, value1, value2))
        prints(' hash1: %s'%hash1)
        prints(' hash2: %s'%hash2)

def assert_match(test_name, fn, value1, value2):
    _assert('is matching '+test_name, fn, value1, value2, equal=True)

def assert_nomatch(test_name, fn, value1, value2):
    _assert('not matching '+test_name, fn, value1, value2, equal=False)


# For testing, run from command line with this:
# calibre-debug -e algorithms.py
if __name__ == '__main__':

    # Test our identical title algorithms
    assert_match('identical title', identical_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('identical title', identical_title_match, 'The Martian Way', 'the martian way')
    assert_nomatch('identical title', identical_title_match, 'The Martian Way', 'Martian Way')

    # Test our similar title algorithms
    assert_match('similar title', similar_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('similar title', similar_title_match, 'The Martian Way', 'the martian way')
    assert_match('similar title', similar_title_match, 'The Martian Way', 'Martian Way')
    assert_match('similar title', similar_title_match, 'The Martian Way', 'The Martian Way')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way', 'The Martain Way')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way', 'The Martian Way (Foo)')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way I', 'The Martian Way II')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way', 'The Martian Way and other stories')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way', 'The Martian Way, or, My New Title')
    assert_nomatch('similar title', similar_title_match, 'The Martian Way', 'The Martian Way aka My New Title')
    assert_nomatch('similar title', similar_title_match, 'Foundation and Earth - Foundation 5', 'Foundation and Earth')

    # Test our soundex title algorithms
    assert_match('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('soundex title', soundex_title_match, 'The Martian Way', 'the martian way')
    assert_match('soundex title', soundex_title_match, 'The Martian Way', 'Martian Way')
    assert_match('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('soundex title', soundex_title_match, 'The Martian Way', 'The Martain Way')
    assert_match('soundex title', soundex_title_match, 'The Martian Way I', 'The Martian Way II')
    assert_match('soundex title', soundex_title_match, 'Angel', 'Angle')
    assert_match('soundex title', soundex_title_match, 'Foundation and Earth - Foundation 5', 'Foundation and Earth')
    assert_nomatch('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way (Foo)')
    assert_nomatch('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way and other stories')
    assert_nomatch('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way, or, My New Title')
    assert_nomatch('soundex title', soundex_title_match, 'The Martian Way', 'The Martian Way aka My New Title')
    assert_nomatch('soundex title', soundex_title_match, 'Foundation 5 - Foundation and Earth', 'Foundation and Earth')

    # Test our fuzzy title algorithms
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'the martian way')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'Martian Way')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way (Foo)')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way: Sequel')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way and other stories')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way, or, My New Title')
    assert_match('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martian Way aka My New Title')
    assert_match('fuzzy title', fuzzy_title_match, 'Foundation and Earth - Foundation 5', 'Foundation and Earth')
    assert_nomatch('fuzzy title', fuzzy_title_match, 'The Martian Way', 'The Martain Way')
    assert_nomatch('fuzzy title', fuzzy_title_match, 'The Martian Way I', 'The Martian Way II')
    assert_nomatch('fuzzy title', fuzzy_title_match, 'Foundation 5 - Foundation and Earth', 'Foundation and Earth')

    # Test our identical author algorithms
    assert_match('identical author', identical_author_match, ['Kevin J. Anderson'], ['Kevin J. Anderson'])
    assert_match('identical author', identical_author_match, ['Kevin J. Anderson'], ['Kevin j. Anderson'])
    assert_nomatch('identical author', identical_author_match, ['Kevin J. Anderson'], ['Kevin J Anderson'])

    # Test our similar author algorithms
    assert_match('similar author', similar_author_match, ['Kevin J. Anderson'], ['Kevin J. Anderson'])
    assert_match('similar author', similar_author_match, ['Kevin J. Anderson'], ['Kevin j. Anderson'])
    assert_match('similar author', similar_author_match, ['Kevin J. Anderson'], ['Kevin J Anderson'])
    assert_match('similar author', similar_author_match, ['Kevin J. Anderson'], ['Anderson, Kevin J.'])
    assert_match('similar author', similar_author_match, ['Kevin Anderson'], ['Kevin Anderson Jr'])
    assert_nomatch('similar author', similar_author_match, ['Kevin J. Anderson'], ['Kevin Anderson'])

    # Test our soundex author algorithms
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Kevin J. Anderson'])
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Kevin j. Anderson'])
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Kevin J Anderson'])
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Keven J. Andersan'])
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Anderson, Kevin J.'])
    assert_match('soundex author', soundex_author_match, ['Kevin Anderson'], ['Kevin Anderson Jr'])
    assert_match('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['Kevin Anderson'])
    assert_nomatch('soundex author', soundex_author_match, ['Kevin J. Anderson'], ['S. Anderson'])

    # Test our fuzzy author algorithms
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Kevin J. Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Kevin j. Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Kevin J Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Kevin Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Anderson, Kevin J.'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['Anderson, Kevin'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['K. J. Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['K. Anderson'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin Anderson'], ['Kevin Anderson Jr'])
    assert_match('fuzzy author', fuzzy_author_match, ['Kevin Anderson'], ['Anderson Jr, K. S.'])
    assert_nomatch('fuzzy author', fuzzy_author_match, ['Kevin J. Anderson'], ['S. Anderson'])

    prints('Tests completed')
