#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, print_function)

import re

from calibre_plugins.overdrive_link.match import title_clean_str
from calibre_plugins.overdrive_link.fixups import INCOMPLETE_TITLES

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (str)


__license__ = 'GPL v3'
__copyright__ = '2012-2025, John Howell <jhowell@acm.org>'


incomplete_titles = set()


def normalize_title(t):
    if not t:
        return ''

    t = t.replace('[Kindle Edition]', '').strip()
    #t = re.sub(r' *\.\.\.$', '', t)    # remove trailing ...

    t = re.sub(r"&amp;", "&", t)     # html entity left over from Amazon

    t = re.sub(r'^(.+), (A|An|The)$', r'\2 \1', t, flags=re.IGNORECASE)  # Reverse titles with articles at end for sorting

    t = re.sub(" +:", ":", t)                           # fix space before colon

    # indications of incomplete book - make sure they are not ignored
    t = re.sub(r'\(Excerpt\)', r' Excerpt', t, flags=re.IGNORECASE)
    t = re.sub(r'\((First [0-9,]+ words)\)', r' \1', t, flags=re.IGNORECASE)
    t = re.sub(r': (Chapters[- ][0-9]+-[0-9]+)', r' \1', t, flags=re.IGNORECASE)
    t = re.sub(r': (free sampler)', r' \1', t, flags=re.IGNORECASE)
    t = t.replace('(SparkNotes Literature Guide)', 'SparkNotes Literature Guide')
    t = t.replace('(Summary)', 'Summary').replace('(summary)', 'Summary')   # See https://brooklyn.overdrive.com/media/3187295

    # Extraneous suffix
    t = t.replace('- The Original Classic Edition', '(The Original Classic Edition)')
    t = re.sub(r':? ?with linked table of contents', '', t, flags=re.IGNORECASE)
    t = re.sub(r':? ?with bonus (material|content)', '', t, flags=re.IGNORECASE)

    t = re.sub(r"\[", "(", t)     # brackets -> parens
    t = re.sub(r"\]", ")", t)     # brackets -> parens

    t = re.sub(r':.{80,}$', '', t)  # remove any extremely long subtitle
    t = re.sub(r'\(.{80,}?\)', '', t)  # remove any extremely long optional text

    t = re.sub(r"[‘’‛′]", "'", t)     # remove smart quotes
    t = re.sub("[‒–—―\u2010\u2011\u2e3a\u2e3b]", "-", t)   # dashes
    t = re.sub(r"\\", "/", t)         # remove backslash

    t = re.sub(r"[^ .,!:;+@&'0-9\w()/-]", '', t, flags=re.IGNORECASE | re.UNICODE)  # remove non-alphanumeric + name chars

    return ' '.join(t.strip().split())  # remove extra white space


def force_unique_title(log, book):
    if is_complete_title(book.title):
        return

    # this title is known to be incomplete causing multiple books to be matched incorrectly
    orig_title = book.title

    if orig_title == "The Complete Series" and book.series:
        unique_id = book.title
        book.title = book.series
    elif book.series and book.series_index:
        i = int(book.series_index) if float(int(book.series_index)) == book.series_index else book.series_index
        unique_id = '%s, Volume %s' % (book.series, str(i))
    elif book.pubdate:
        unique_id = book.pubdate.isoformat()[0:4]   # published year
    elif book.isbn:
        unique_id = book.isbn
    else:
        unique_id = book.book_id

    book.title = '%s, %s' % (book.title, unique_id)
    log.info('Forced unique title: "%s" to "%s"' % (orig_title, book.title))


def is_complete_title(title):
    if not incomplete_titles:
        for it in INCOMPLETE_TITLES:
            incomplete_titles.add(title_clean_str(it))

    return title_clean_str(title) not in incomplete_titles


def safe_word_from_title(title):
    # Pick the best single word from a title for search
    if re.search(r'([()|]|::)', title):
        return ''   # reject complex titles

    words = title.lower().replace('-', ' ').partition(':')[0].split()   # words prior to subtitle
    words = sorted([w for w in words if re.match(r'^[a-z]+$', w)], key=lambda w: -len(w))    # only simple words, longest first
    return words[0] if words and len(words[0]) > 3 else ''
