#!/usr/bin/env python
# ~*~ coding: utf-8 ~*~

__license__   = 'GPL v3'
__copyright__ = '2020, Ahmed Zaki <azaki00.dev@gmail.com>'
__docformat__ = 'restructuredtext en'

from collections import OrderedDict, defaultdict
import time
import re

from qt.core import (Qt, QWidget, QVBoxLayout, QSpinBox, QGroupBox)


from calibre import prints
from calibre.constants import DEBUG
from calibre.ebooks.metadata.book.formatter import SafeFormat
from calibre.utils.config import tweaks
from calibre.utils.localization import get_udc

from calibre_plugins.category_tags.advanced_matching.templates import (get_metadata_object,
                            check_template, TEMPLATE_ERROR, TemplateBox)
from calibre_plugins.category_tags.advanced_matching import column_metadata

try:
    load_translations()
except NameError:
    prints("Category Tags::advanced_matching/matching.py - exception when loading translations")

#######################

ignore_author_words = ['von', 'van', 'jr', 'sr', 'i', 'ii', 'iii', 'second', 'third',
                       'md', 'phd']
IGNORE_AUTHOR_WORDS_MAP = dict((k,True) for k in ignore_author_words)

def fuzzy_it(text, patterns=None):
    fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
                [
                    (r'[\[\](){}<>\'";,:#]', ''),
                    (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
                    (r'[-._]', ' '),
                    (r'\s+', ' ')
                ]]
    if not patterns:
        patterns = fuzzy_title_patterns
    text = text.strip().lower()
    for pat, repl in patterns:
        text = pat.sub(repl, text)
    return text.strip()

def soundex(name, length=4):
    '''
    soundex module conforming to Knuth's algorithm
    implementation 2000-12-24 by Gregory Jorgensen
    public domain
    http://code.activestate.com/recipes/52213-soundex-algorithm/
    '''
    # digits holds the soundex values for the alphabet
    #         ABCDEFGHIJKLMNOPQRSTUVWXYZ
    digits = '01230120022455012623010202'
    sndx = ''
    fc = ''
    orda = ord('A')
    ordz = ord('Z')

    # translate alpha chars in name to soundex digits
    for c in name.upper():
        ordc = ord(c)
        if ordc >= orda and ordc <= ordz:
            if not fc: fc = c   # remember first letter
            d = digits[ordc-orda]
            # duplicate consecutive soundex digits are skipped
            if not sndx or (d != sndx[-1]):
                sndx += d

    # replace first digit with first alpha character
    sndx = fc + sndx[1:]

    # remove all 0s from the soundex code
    sndx = sndx.replace('0','')

    # return soundex code padded to length characters
    return (sndx + (length * '0'))[:length]


# --------------------------------------------------------------
#           Title Matching Algorithm Functions
# --------------------------------------------------------------

def get_title_tokens(title, strip_subtitle=True, decode_non_ascii=True):
    '''
    Take a title and return a list of tokens useful for an AND search query.
    Excludes subtitles (optionally), punctuation and a, the.
    '''
    if title:
        # strip sub-titles
        if strip_subtitle:
            subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
            if len(subtitle.sub('', title)) > 1:
                title = subtitle.sub('', title)

        title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
        [
            # Remove things like: (2010) (Omnibus) etc.
            (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
            # Remove any strings that contain the substring edition inside
            # parentheses
            (r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
            # Remove commas used a separators in numbers
            (r'(\d+),(\d+)', r'\1\2'),
            # Remove hyphens only if they have whitespace before them
            (r'(\s-)', ' '),
            # Remove single quotes not followed by 's'
            (r"'(?!s)", ''),
            # Replace other special chars with a space
            (r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
        ]]

        for pat, repl in title_patterns:
            title = pat.sub(repl, title)

        if decode_non_ascii:
            title = get_udc().decode(title)
        tokens = title.split()
        for token in tokens:
            token = token.strip()
            if token and (token.lower() not in ('a', 'the')):
                yield token.lower()

def identical_title_match(title, lang=None):
    if lang:
        return lang + title.lower()
    return title.lower()

def similar_title_match(title, lang=None):
    title = get_udc().decode(title)
    result = fuzzy_it(title)
    if lang:
        return lang + result
    return result

def soundex_title_match(title, lang=None):
    # Convert to an equivalent of "similar" title first before applying the soundex
    title = similar_title_match(title)
    result = soundex(title, title_soundex_length)
    if lang:
        return lang + result
    return result

def fuzzy_title_match(title, lang=None):
    title_tokens = list(get_title_tokens(title))
    # We will strip everything after "and", "or" provided it is not first word in title - this is very aggressive!
    for i, tok in enumerate(title_tokens):
        if tok in ['&', 'and', 'or', 'aka'] and i > 0:
            title_tokens = title_tokens[:i]
            break
    result = ''.join(title_tokens)
    if lang:
        return lang + result
    return result


# --------------------------------------------------------------
#           Author Matching Algorithm Functions
#
#  Note that these return two hashes
#  - first is based on the author name supplied
#  - second (if not None) is based on swapping name order
# --------------------------------------------------------------

def get_author_tokens(author, decode_non_ascii=True):
    '''
    Take an author and return a list of tokens useful for duplicate
    hash comparisons. This function tries to return tokens in
    first name middle names last name order, by assuming that if a comma is
    in the author name, the name is in lastname, other names form.
    '''

    if author:
        # Leave ' in there for Irish names
        remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
        replace_pat = re.compile(r'[-+.:;]')
        au = replace_pat.sub(' ', author)
        if decode_non_ascii:
            au = get_udc().decode(au)
        parts = au.split()
        if ',' in au:
            # au probably in ln, fn form
            parts = parts[1:] + parts[:1]
        for tok in parts:
            tok = remove_pat.sub('', tok).strip()
            if len(tok) > 0 and tok.lower() not in IGNORE_AUTHOR_WORDS_MAP:
                yield tok.lower()

def identical_authors_match(author):
    return author.lower(), None

def similar_authors_match(author):
    author_tokens = list(get_author_tokens(author))
    ahash = ' '.join(author_tokens)
    rev_ahash = None
    if len(author_tokens) > 1:
        author_tokens = author_tokens[1:] + author_tokens[:1]
        rev_ahash = ' '.join(author_tokens)
    return ahash, rev_ahash

def soundex_authors_match(author):
    # Convert to an equivalent of "similar" author first before applying the soundex
    author_tokens = list(get_author_tokens(author))
    if len(author_tokens) <= 1:
        return soundex(''.join(author_tokens)), None
    # We will put the last name at front as want the soundex to focus on surname
    new_author_tokens = [author_tokens[-1]]
    new_author_tokens.extend(author_tokens[:-1])
    ahash = soundex(''.join(new_author_tokens), author_soundex_length)
    rev_ahash = None
    if len(author_tokens) > 1:
        rev_ahash = soundex(''.join(author_tokens), author_soundex_length)
    return ahash, rev_ahash

def fuzzy_authors_match(author):
    author_tokens = list(get_author_tokens(author))
    if not author_tokens:
        return '', None
    elif len(author_tokens) == 1:
        return author_tokens[0], None
    # We have multiple tokens - create a new list of initial plus last token as surname
    # However we do not want to do a reversed permutation
    # i.e. A. Bronte should return "ABronte" and "", not "BA"!
    new_author_tokens = [author_tokens[0][0], author_tokens[-1]]
    ahash = ''.join(new_author_tokens)
    return ahash, None


# --------------------------------------------------------------
#           Series Matching Algorithm Functions
# --------------------------------------------------------------

def get_series_tokens(series, decode_non_ascii=True):
    '''
    Take a series and return a list of tokens useful for duplicate
    hash comparisons.
    '''

    ignore_words = ['the', 'a', 'and',]
    if series:
        remove_pat = re.compile(r'[,!@#$%^&*(){}`~\'"\s\[\]/]')
        replace_pat = re.compile(r'[-+.:;]')
        s = replace_pat.sub(' ', series)
        if decode_non_ascii:
            s = get_udc().decode(s)
        parts = s.split()
        for tok in parts:
            tok = remove_pat.sub('', tok).strip()
            if len(tok) > 0 and tok.lower() not in ignore_words:
                yield tok.lower()

def similar_series_match(series):
    series_tokens = list(get_series_tokens(series))
    return ' '.join(series_tokens)

def soundex_series_match(series):
    # Convert to an equivalent of "similar" series before applying the soundex
    series_tokens = list(get_series_tokens(series))
    if len(series_tokens) <= 1:
        return soundex(''.join(series_tokens))
    return soundex(''.join(series_tokens), series_soundex_length)

def fuzzy_series_match(series):
    # Fuzzy is going to just be the first name of the series
    series_tokens = list(get_series_tokens(series))
    if not series_tokens:
        return ''
    return series_tokens[0]


# --------------------------------------------------------------
#           Publisher Matching Algorithm Functions
# --------------------------------------------------------------

def get_publisher_tokens(publisher, decode_non_ascii=True):
    '''
    Take a publisher and return a list of tokens useful for duplicate
    hash comparisons.
    '''

    ignore_words = ['the', 'inc', 'ltd', 'limited', 'llc', 'co', 'pty',
                    'usa', 'uk']
    if publisher:
        remove_pat = re.compile(r'[,!@#$%^&*(){}`~\'"\s\[\]/]')
        replace_pat = re.compile(r'[-+.:;]')
        p = replace_pat.sub(' ', publisher)
        if decode_non_ascii:
            p = get_udc().decode(p)
        parts = p.split()
        for tok in parts:
            tok = remove_pat.sub('', tok).strip()
            if len(tok) > 0 and tok.lower() not in ignore_words:
                yield tok.lower()

def similar_publisher_match(publisher):
    publisher_tokens = list(get_publisher_tokens(publisher))
    return ' '.join(publisher_tokens)

def soundex_publisher_match(publisher):
    # Convert to an equivalent of "similar" publisher before applying the soundex
    publisher_tokens = list(get_publisher_tokens(publisher))
    if len(publisher_tokens) <= 1:
        return soundex(''.join(publisher_tokens))
    return soundex(''.join(publisher_tokens), publisher_soundex_length)

def fuzzy_publisher_match(publisher):
    # Fuzzy is going to just be the first name of the publisher, unless
    # that is just a single letter, in which case first two names
    publisher_tokens = list(get_publisher_tokens(publisher))
    if not publisher_tokens:
        return ''
    first = publisher_tokens[0]
    if len(first) > 1 or len(publisher_tokens) == 1:
        return first
    return ' '.join(publisher_tokens[:2])


# --------------------------------------------------------------
#           Tag Matching Algorithm Functions
# --------------------------------------------------------------

def get_tag_tokens(tag, decode_non_ascii=True):
    '''
    Take a tag and return a list of tokens useful for duplicate
    hash comparisons.
    '''

    ignore_words = ['the', 'and', 'a']
    if tag:
        remove_pat = re.compile(r'[,!@#$%^&*(){}`~\'"\s\[\]/]')
        replace_pat = re.compile(r'[-+.:;]')
        t = replace_pat.sub(' ', tag)
        if decode_non_ascii:
            t = get_udc().decode(t)
        parts = t.split()
        for tok in parts:
            tok = remove_pat.sub('', tok).strip()
            if len(tok) > 0 and tok.lower() not in ignore_words:
                yield tok.lower()

def similar_tags_match(tag):
    tag_tokens = list(get_tag_tokens(tag))
    return ' '.join(tag_tokens)

def soundex_tags_match(tag):
    # Convert to an equivalent of "similar" tag before applying the soundex
    tag_tokens = list(get_tag_tokens(tag))
    if len(tag_tokens) <= 1:
        return soundex(''.join(tag_tokens))
    return soundex(''.join(tag_tokens), publisher_soundex_length)

def fuzzy_tags_match(tag):
    # Fuzzy is going to just be the first name of the tag
    tag_tokens = list(get_tag_tokens(tag))
    if not tag_tokens:
        return ''
    return tag_tokens[0]

##################################

class MetadataMatch(object):

    # Matches must have a unique name attribute.
    name = 'no name provided'
    description = ''

    '''
    This is the base class for all algorithms
    '''
    def __init__(self, gui):
        '''
        All algorithms are intialized at startup
        The are re-initialized on library change, and on adding or modifying custom algorithms
        '''
        self.gui = gui
        self.db = self.gui.current_db

    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        '''
        This is the method that contain the logic of the algorithm.
        The settings is a dictionary with options configured for the specific
        algorithm using the settings button.
        '''
        raise NotImplementedError

    def factory(self, field_name, reverse=False, composite_has_names=False):
        '''
        return function to calculate hash based on filed name
        '''
        return lambda _hash, _mi, _settings: self.run(field_name, _hash, _mi, reverse, composite_has_names, _settings)

    def config_widget(self):
        '''
        If you want your action to have settings dialog, implement this method
        This should return a Qwidget (not dialog) with the following methods:
        [*] __init__(self, gui)
        [*] save_settings(settings)
                This method is used to save the settings from the widget
                it should return a dictionary containing all the settings
        [*] load_settings(self, settings)
                This method is used to load the saved settings into the
                widget
        '''
        return None

    def default_settings(self):
        '''
        default settings to be used if no settings are configured
        '''
        return {}

    def validate(self, settings, target_db=None):
        random_val = '000' #must be numerical, otheriwse might err when function expects number
        mi = get_metadata_object(self.gui)
        try:
            self.run('title', random_val, mi, False, False, settings)
        except Exception as e:
            if DEBUG:
                prints('Find Duplicates: error running function: {} with settings: {}, return this exception: {}'.format(self.name, settings, e))
                import traceback
                print(traceback.format_exc())
            return (_('Match Error'), _('Error when trying to run algorithm: {}'.format(self.name)))
        return True

    def has_reverse(self, field_name, has_names):
        '''
        Returns True for algorithm that has a reverse alorithm for field_name.
        '''
        return False

class TemplateMatchWidget(TemplateBox):
    def __init__(self, parent, gui, action, name, title):
        self.action = action
        self.gui = gui
        self.db = self.gui.current_db
        mi = get_metadata_object(self.gui)
        TemplateBox.__init__(
                self,
                parent,
                self.gui,
                template_text='',
                placeholder_text = _("Write you algorithm using calibre's template language"),
                mi=mi
            )
        self.setWindowTitle(title)

    def _on_select_chk_change(self):
        state = self.select_chk.isChecked()
        if not state:
            self.search_opt.setChecked(True)
        self.ids_opt.setEnabled(state)

    def load_settings(self, settings):
        if settings:
            template = settings['template']
            self.textbox.insertPlainText(template)

    def save_settings(self):
        settings = {}
        settings['template'] = str(self.textbox.toPlainText()).rstrip()
        return settings

    def accept(self):
        self.settings = self.save_settings()
        # validate settings
        is_valid = self.action.validate(self.settings)
        if is_valid is not True:
            msg, details = is_valid
            error_dialog(
                self,
                msg,
                details,
                show=True
            )
            return
        TemplateBox.accept(self)

class TemplateMatch(MetadataMatch):

    name = 'Template Match'
    _is_builtin = True
    description = _('Custom algorithms using calibre template language.')

    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        # update the mi to presist the hash as this the only way a template can see the result of the previous algorithm/template
        self.do_update_mi(mi, field_name, hash_)

        template = settings['template']
        hash_ = SafeFormat().safe_format(template, mi, TEMPLATE_ERROR, mi)

        return hash_

    def do_update_mi(self, mi, field_name, value):
        if field_name.startswith('identifier:'):
            # 'identifier:' has no entry in field_metadata and would raise an exception
            pass
        # all composite fields (even those where ['is_multiple'] != {}) are of string type
        elif not mi.metadata_for_field(field_name)['datatype'] == 'composite':
            if field_name == 'authors' or ( mi.metadata_for_field(field_name)['is_multiple'] != {} ):
                # for fields with multiple items, when we update a single item, we must put it in a list
                # because mi expect multiple value field, if you don't do this it will treat the string
                # value as iterable and split it into letters.
                value = [value]
            elif mi.metadata_for_field(field_name)['datatype'] == 'datetime':
                try:
                    value = parse_date(value)
                except:
                    if DEBUG:
                        prints('Find Duplicates: Unable to update mi object with value ({}) for field ({}) for book_id ({})'.format(value, field_name, mi.id))
                    return
            elif mi.metadata_for_field(field_name)['datatype'] in ['rating', 'float']:
                try:
                    value = float(value)
                except:
                    if DEBUG:
                        prints('Find Duplicates: Unable to update mi object with value ({}) for field ({}) for book_id ({})'.format(value, field_name, mi.id))
                    return
            elif mi.metadata_for_field(field_name)['datatype'] == 'int':
                try:
                    value = int(value)
                except:
                    if DEBUG:
                        prints('Find Duplicates: Unable to update mi object with value ({}) for field ({}) for book_id ({})'.format(value, field_name, mi.id))
                    return
        mi.set(field_name, value)

    def validate(self, settings, target_db=None):
        template = settings['template']
        if not template:
            return _('Empty template'), _('You must have a template for this algorithm')
        is_valid = check_template(template, self.gui, target_db, print_error=False)
        if is_valid is True:
            return True
        else:
            msg, details = is_valid
            if DEBUG:
                prints('Find Duplicates: tepmlate: "{}" returned this error: {}'.format(name, details))
            return msg, details

    def config_widget(self):
        return TemplateMatchWidget

class IdenticalMatch(MetadataMatch):

    name = 'Identical Match'
    description = _('Case insensitive exact match')
    _is_builtin = True

    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        if has_names:
            algorithm = self.factory('authors', reverse)
        else:
            algorithm = self.factory(field_name, reverse)
        hash_ = algorithm(hash_, mi, settings, **kwargs)
        return hash_

    def factory(self, field_name, reverse=False, composite_has_names=False):
        delegate = column_metadata(self.db, field_name)['delegate']
        if composite_has_names:
            delegate = 'authors'
        return getattr(self, 'identical_{}_match'.format(delegate))

    def identical_title_match(self, title, mi, settings, **kwargs):
        lang = settings.get('lang', None)
        return identical_title_match(title, lang=lang)

    def identical_authors_match(self, author, mi, settings, **kwargs):
        ahash, rev_ahash = identical_authors_match(author)
        return ahash

    def identical_series_match(self, series, mi, settings, **kwargs):
        return identical_title_match(series)

    def identical_publisher_match(self, publisher, mi, settings, **kwargs):
        return identical_title_match(publisher)

    def identical_tags_match(self, tags, mi, settings, **kwargs):
        return identical_title_match(tags)

class SimilarMatch(MetadataMatch):

    name = 'Similar Match'
    description = _('Removal of common punctuation and prefixes')


    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        if has_names:
            algorithm = self.factory('authors', reverse)
        else:
            algorithm = self.factory(field_name, reverse)
        hash_ = algorithm(hash_, mi, settings, **kwargs)
        return hash_

    def factory(self, field_name, reverse=False, composite_has_names=False):
        delegate = column_metadata(self.db, field_name)['delegate']
        if composite_has_names:
            delegate = 'authors'
        if reverse:
            return getattr(self, 'rev_similar_{}_match'.format(delegate))        
        else:
            return getattr(self, 'similar_{}_match'.format(delegate))

    def similar_title_match(self, title, mi, settings, **kwargs):
        lang = settings.get('lang', None)
        return similar_title_match(title)

    def similar_authors_match(self, author, mi, settings, **kwargs):
        ahash, rev_ahash = similar_authors_match(author)
        return ahash

    def rev_similar_authors_match(self, author, mi, settings, **kwargs):
        ahash, rev_ahash = similar_authors_match(author)
        return rev_ahash

    def similar_series_match(self, series, mi, settings, **kwargs):
        return similar_series_match(series)

    def similar_publisher_match(self, publisher, mi, settings, **kwargs):
        return similar_publisher_match(publisher)

    def similar_tags_match(self, tags, mi, settings, **kwargs):
        return similar_tags_match(tags)

    def has_reverse(self, field_name, has_names):
        if has_names:
            return True
        if column_metadata(self.db, field_name)['delegate'] == 'authors':
            return True
        else:
            return False

class SoundexConfigWidget(QWidget):
    def __init__(self, gui, *args, **kwargs):
        QWidget.__init__(self)
        self.gui = gui
        self.db = self.gui.current_db
        self._init_controls()

    def _init_controls(self):

        l = self.l = QVBoxLayout()
        self.setLayout(l)
        
        groupbox = QGroupBox(_('Soundex length'))
        l.addWidget(groupbox)
        groupbox_l = QVBoxLayout()
        groupbox.setLayout(groupbox_l)
        self.spin = QSpinBox()
        groupbox_l.addWidget(self.spin)

        self.spin.setMinimum(1)
        self.spin.setMaximum(100)
        self.spin.setSingleStep(1)
        self.spin.setValue(6)
        
        l.addStretch(1)

        self.setMinimumSize(300,300)

    def load_settings(self, settings):
        if settings:
            self.spin.setValue(settings['soundex_length'])

    def save_settings(self):
        settings = {}
        settings['soundex_length'] = self.spin.value()
        return settings

class SoundexMatch(MetadataMatch):

    name = 'Soundex Match'
    description = _('Phonetic representation of names')
    _is_builtin = True

    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        if has_names:
            algorithm = self.factory('authors', reverse)
        else:
            algorithm = self.factory(field_name, reverse)
        hash_ = algorithm(hash_, mi, settings, **kwargs)
        return hash_

    def factory(self, field_name, reverse=False, composite_has_names=False):
        delegate = column_metadata(self.db, field_name)['delegate']
        if composite_has_names:
            delegate = 'authors'
        if reverse:
            return getattr(self, 'rev_soundex_{}_match'.format(delegate))        
        else:
            return getattr(self, 'soundex_{}_match'.format(delegate))

    def soundex_title_match(self, title, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        lang = kwargs.get('lang', None)
        set_title_soundex_length(soundex_length)
        return soundex_title_match(title, lang=lang)

    def soundex_authors_match(self, author, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        # Convert to an equivalent of "similar" author first before applying the soundex
        author_tokens = list(get_author_tokens(author))
        if len(author_tokens) <= 1:
            return soundex(''.join(author_tokens))
        # We will put the last name at front as want the soundex to focus on surname
        new_author_tokens = [author_tokens[-1]]
        new_author_tokens.extend(author_tokens[:-1])
        ahash = soundex(''.join(new_author_tokens), soundex_length)
        return ahash

    def rev_soundex_authors_match(self, author, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        # Convert to an equivalent of "similar" author first before applying the soundex
        author_tokens = list(get_author_tokens(author))
        if len(author_tokens) <= 1:
            return ''
        # We will put the last name at front as want the soundex to focus on surname
        rev_ahash = soundex(''.join(author_tokens), soundex_length)
        return rev_ahash

    def soundex_series_match(self, series, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        set_series_soundex_length(soundex_length)
        return soundex_series_match(series)

    def soundex_publisher_match(self, publisher, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        set_publisher_soundex_length(soundex_length)
        return soundex_publisher_match(publisher)

    def soundex_tags_match(self, tags, mi, settings, **kwargs):
        soundex_length = settings.get('soundex_length', 6)
        set_tags_soundex_length(soundex_length)
        return soundex_tags_match(tags)

    def config_widget(self):
        return SoundexConfigWidget

    def has_reverse(self, field_name, has_names):
        if has_names:
            return True
        if column_metadata(self.db, field_name)['delegate'] == 'authors':
            return True
        else:
            return False

class FuzzyMatch(MetadataMatch):

    name = 'Fuzzy Match'
    description = _("Remove all punctuation, subtitles and any words after 'and', 'or' or 'aka'")

    def run(self, field_name, hash_, mi, reverse, has_names, settings, *args, **kwargs):
        if has_names:
            algorithm = self.factory('authors', reverse)
        else:
            algorithm = self.factory(field_name, reverse)
        hash_ = algorithm(hash_, mi, settings, **kwargs)
        return hash_

    def factory(self, field_name, reverse=False, composite_has_names=False):
        delegate = column_metadata(self.db, field_name)['delegate']
        if composite_has_names:
            delegate = 'authors'
        return getattr(self, 'fuzzy_{}_match'.format(delegate))

    def fuzzy_title_match(self, title, mi, settings, **kwargs):
        lang = settings.get('lang', None)
        return fuzzy_title_match(title)

    def fuzzy_authors_match(self, author, mi, settings, **kwargs):
        ahash, rev_ahash = fuzzy_authors_match(author)
        return ahash

    def fuzzy_series_match(self, series, mi, settings, **kwargs):
        return fuzzy_series_match(series)

    def fuzzy_publisher_match(self, publisher, mi, settings, **kwargs):
        return fuzzy_publisher_match(publisher)

    def fuzzy_tags_match(self, tags, mi, settings, **kwargs):
        return fuzzy_tags_match(tags)

#=================

def get_all_algorithms(gui, user_algorithm_classes):

    builtin_algorithms = OrderedDict()
    
    _builtin_algorithms = [
        IdenticalMatch,
        SimilarMatch,
        SoundexMatch,
        FuzzyMatch,
        TemplateMatch
    ]

    for algorithm_cls in _builtin_algorithms:
        builtin_algorithms[algorithm_cls.name] = algorithm_cls

    all_algorithms = OrderedDict()
    user_algorithms = OrderedDict()
    
    for algorithm_name, algorithm_cls in builtin_algorithms.items():
        algorithm = algorithm_cls(gui)
        all_algorithms[algorithm_name]= algorithm

    for algorithm_name, algorithm_cls in user_algorithm_classes.items():
        # dont override builtin algorithms
        if algorithm_name in builtin_algorithms.keys():
            continue
        if algorithm_name in ['', 'no name provided']:
            continue
        try:
            algorithm = algorithm_cls(gui)
            algorithm.is_user_algorithm = True
            all_algorithms[algorithm_name]= algorithm
            user_algorithms[algorithm_name]= algorithm
        except Exception as e:
            import traceback
            if DEBUG:
                prints('Find Duplicates: Error intializing user action: {}\n{}'.format(algorithm_name, traceback.format_exc()))

    return all_algorithms, builtin_algorithms, user_algorithms
