#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, print_function)

import re

from calibre_plugins.overdrive_link.titlecase import titlecase
from calibre_plugins.overdrive_link.match import (
    author_parse, clean_author_list, A_FIRST, A_MIDDLE, A_OPTIONAL,
    HONORIFIC_PREFIXES, HONORIFIC_SUFFIXES, PERSON_SUFFIXES)


__license__ = 'GPL v3'
__copyright__ = '2012-2025, John Howell <jhowell@acm.org>'


INITIALS = set('abcdefghijklmnopqrstuvwxyz')   # set of individual letters


def normalize_author(a, unreverse=True, fix_case=False, fix_ia=False):
    def suffix_pattern(suf):
        return r'\.?'.join(list(suf)) + r'\.?'

    if re.match(r'^©[0-9]{4} by', a):
        return ''   # drop extraneous copyright notice

    if a.lower() in ['not yet available', 'smashwords', 'various', 'various authors']:
        return ''   # drop "Not Yet Available" used by Everand, "Various" used by Freading for multiple authors

    if fix_ia:
        # format is: last name, first name[, notes, notes]
        #print("fix ia author: %s" % a)
        a = re.sub(' \\(.*\\)', '', a)                              # IA sometimes has alternate author name in parens

        acomma = a.split(",")
        if len(acomma) > 2:
            a = acomma[0] + "," + acomma[1]     # keep only "last, first"

        # period occasionally incorrectly used instead of comma
        a = re.sub(
            '[,.] (artist|author|aut|compiler|comp|joint author|joint comp|cn|dedicatee|editor|ed|edt|et al|'
            'illustrator|ill|illus|novelist|pseud|recipient)$', '', a)
        a = re.sub('[,.]? (approximately )?[12][0-9]{3}(-([12][0-9]{3})?)?', '', a)
        a = re.sub('[,.] (b|d|fl)\\. [12][0-9]{3}', '', a)
        a = re.sub(
            '[,.] [12][0-9]{3} (january|jan|february|feb|march|mar|april|apr|may|june|jun|july|jul|august|aug|'
            'september|sep|october|oct|november|nov|december|dec) [0-9]{1,2}', '', a, flags=re.IGNORECASE)

    for suf in PERSON_SUFFIXES:
        a = re.sub(', (%s)$' % suffix_pattern(suf), r' \1', a, flags=re.IGNORECASE)    # ", Jr" -> " Jr"  etc.
        a = re.sub(', (%s),' % suffix_pattern(suf), r' \1,', a, flags=re.IGNORECASE)    # ", Jr." -> " Jr."  etc.

    if unreverse and (',' in a):
        if re.search(r' (sr|jr|sr\.|jr\.)$', a, flags=re.IGNORECASE):
            # handle cases from PG like "Deer, Jim G., Sr." -> "Jim G. Deer Sr."
            a, space, suff = a.rpartition(' ')
            suff = space + suff
        else:
            suff = ''

        last, comma, first = a.partition(',')
        a = first + ' ' + last + suff

    a = re.sub(r' et al\.?$', '', a, flags=re.IGNORECASE)   # remove "et al" used by Project Gutenberg

    a = a.replace(',', '').replace(';', '').replace(' & ', ' and ')

    if re.match('^by ', a, flags=re.IGNORECASE):
        a = a[3:]

    a = re.sub(r' author$', '', a, flags=re.IGNORECASE)
    a = re.sub(r' \(ed\.\)', '', a, flags=re.IGNORECASE)
    a = re.sub(r' \(editor\)', '', a, flags=re.IGNORECASE)
    a = re.sub(r' \(edt\)', '', a, flags=re.IGNORECASE)     # Editor - Axis 360
    a = re.sub(r' \(crt\)', '', a, flags=re.IGNORECASE)     # Creator - Axis 360
    a = re.sub(r' \(con\)', '', a, flags=re.IGNORECASE)     # Contributor - Axis 360
    a = re.sub(r' \(frw\)', '', a, flags=re.IGNORECASE)     # Forward - Axis 360
    a = re.sub(r' \(ilt\)', '', a, flags=re.IGNORECASE)     # Illustrator - Axis 360
    if re.match('^More creators.', a) or a == 'Various':
        a = ''

    a = re.sub(r'\s&\s', ' and ', a)    # change '&' to 'and' (Better Homes & Gardens)

    if len(a.split()) > 2:
        # drop honorific, unless it is used with a single name
        for pre in HONORIFIC_PREFIXES:
            a = re.sub(r'^%s\.? ' % pre, '', a, flags=re.IGNORECASE)    # Drop honorific prefixes

        for suf in HONORIFIC_SUFFIXES:
            a = re.sub(' %s$' % suffix_pattern(suf), '', a, flags=re.IGNORECASE)    # Drop honorific suffixes

    a = re.sub(r'([A-Za-z]{4})\.( |$)', r'\1\2', a)     # remove extraneous periods (after 4 or more letter word)

    n = 1
    while (n):  # until no more found
        a, n = re.subn(r'(^| )([A-Za-z])( |$)', r'\1\2.\3', a)   # Fix missing periods after initials

    a = re.sub(r'\.([a-zA-Z0-9])', r'. \1', a)  # Split initials with no space

    a = re.sub(r"[‘’?']", "'", a)     # remove smart quotes

    a = re.sub(r"[^ .'0-9\w-]", '', a, flags=re.IGNORECASE | re.UNICODE)    # remove non-alphanumeric + name chars

    if fix_case and (len(a) > 5) and (a.upper() == a):
        a = titlecase(a.lower())    # Correct all upper case name

    a = ' '.join(a.strip().split())

    #if a != a_in: print('normalize_author: "%s" to "%s"'%(a_in,a))
    return a


def reverse_author_name(a):
    # Convert author name into "last, first" format

    if len(a) == 0:
        return a

    al = a.replace(',', '').replace('.', '').split()

    if len(al) == 1:
        return al[0]    # nothing to do for single name

    if len(al) > 2 and al[-1].lower() in PERSON_SUFFIXES:
        return al[-2] + al[-1] + ', ' + ' '.join(al[:-2])   # "John Smith Jr" -> "Smith Jr, John"

    return al[-1] + ', ' + ' '.join(al[:-1])


def author_search_prep(author, remove_accents=False):
    return add_punctuation(author_str(clean_author_list(author, remove_accents)))


def alternate_author_names(author, config, log=None):
    # produce variants of the author name
    alt_names = set()

    if author_search_prep(author) in config.author_search_equivalents:
        if log:
            log.info('Using configured author name equivalents')

        for equivalent_author in config.author_search_equivalents[author_search_prep(author)]:
            alt_names.add(equivalent_author)
            alt_names.add(add_punctuation(equivalent_author))
            alt_names.add(combine_initials(add_punctuation(equivalent_author)))
            alt_names.add(combine_initials2(add_punctuation(equivalent_author)))

    else:
        cl = clean_author_list(author, remove_accents=True)
        cleaned_author = author_str(cl)

        #if log:
        #    log.info('cleaned_author %s' % cleaned_author)

        alt_names.add(add_punctuation(cleaned_author))
        alt_names.add(add_punctuation(remove_optional_names(cl)))
        alt_names.add(combine_initials(add_punctuation(cleaned_author)))
        alt_names.add(combine_initials2(add_punctuation(cleaned_author)))
        alt_names.add(add_punctuation(replace_middle_with_initial(cl)))

        al = clean_author_list(author, remove_accents=False)
        accented_author = author_str(al)
        if accented_author != cleaned_author:
            alt_names.add(add_punctuation(accented_author))
            alt_names.add(add_punctuation(remove_optional_names(al)))
            alt_names.add(combine_initials(add_punctuation(accented_author)))
            alt_names.add(combine_initials2(add_punctuation(accented_author)))
            alt_names.add(add_punctuation(replace_middle_with_initial(al)))

    alt_names.discard('')
    return alt_names


def author_str(cl):
    return ' '.join(cl)


def remove_optional_names(cl):
    return simplify_name(cl, remove_optional=True, remove_middle=True)


def replace_middle_with_initial(cl):
    return simplify_name(cl, remove_optional=True, replace_middle_with_initial=True)


def simplify_name(cl, remove_optional=False, remove_middle=False, replace_middle_with_initial=False):
    words = []
    first_initial = False

    for name, opt in author_parse(list(cl)):
        if remove_optional and opt == A_OPTIONAL:
            continue

        if opt == A_FIRST and len(name) == 1:
            first_initial = True        # Name starts with an initial: Don't simplify middle names

        if opt == A_MIDDLE and not first_initial:
            if remove_middle:
                continue

            if replace_middle_with_initial:
                name = name[0]

        words.append(name)

    result = author_str(words)
    #print("*****simplify_name(%s, %s, %s, %s) %s = %s" % (repr(cl), remove_optional, remove_middle,
    #                replace_middle_with_initial, first_initial, result))

    return result


def add_punctuation(author):
    # Periods after initials and comma before jr are needed in author names for Classic OverDrive Search
    # Exceptions do occur: "Steve Gordon Jr" works but "Steve Gordon, Jr" does not

    n = 1
    while (n):  # until no more found
        author, n = re.subn(r'(^| )([a-z])( |$)', r'\1\2.\3', author)   # fix initials

    author = re.sub(r'(?<!,) jr$', r', jr.', author)    # fix jr
    return author


def combine_initials(author):
    s = ' %s ' % author
    s = re.sub(r' ([a-z])\. ([a-z])\. ([a-z])\. ', r' \1.\2.\3. ', s)   # merge three initials with periods
    s = re.sub(r' ([a-z])\. ([a-z])\. ', r' \1.\2. ', s)    # merge two initials with periods
    s = re.sub(r' ([a-z]) ([a-z]) ([a-z]) ', r' \1\2\3 ', s)    # merge three initials without periods
    s = re.sub(r' ([a-z]) ([a-z]) ', r' \1\2 ', s)  # merge two initials without periods
    s = s.strip()

    #print("Combine_initials %s -> %s" % (author, s))
    return s


def combine_initials2(author):
    s = ' %s ' % author
    s = re.sub(r' ([a-z])\. ([a-z])\. ([a-z])\. ', r' \1\2\3 ', s)  # merge three initials with periods
    s = re.sub(r' ([a-z])\. ([a-z])\. ', r' \1\2 ', s)  # merge two initials with periods
    s = s.strip()

    #print("Combine_initials2 %s -> %s" % (author, s))
    return s
