# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

__license__   = 'GPL v3'
__copyright__ = '2011, meme'
__docformat__ = 'restructuredtext en'

#####################################################################
# Sort names Kindle style
#####################################################################

import re
from collections import defaultdict

from calibre_plugins.kindle_collections.utilities import debug_print

# Articles ignored at start of title, unless two identical names have articles 
SORT_ARTICLES = [ 'a', 'an', 'the' ] 

# Kindle ignores these characters and sorts them randomly (unless name is only invisible characters:!
SORT_INVISIBLE = '-?/]#.\'\\*})&:@%;"!,'  # means the visible are " _`^~'([{$+<=>|" + 0-9, a-z

# Sort order for all chars (invisible needed in case the collection is 1 char long)
SORT_CHARACTERS = '- _,;:!?/.`^~\'"()[]{}@$*&#%+<=>|0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 

#####################################################################

class KindleSort():

    # Sort collections as the Kindle 3 sorts them in sort 'By Title'
    # No leading/trailing spaces, lowercase/uppercase ignored and sorted randomly
    # Invisible characters are ignored unless there are only invisible characters in which case sorted randomly
    # If you put a space after a symbol you can't type first letter of title and jump to it
    # When sorting by Collections/Most Recent Kindle uses last access time

    def __init__(self):
        # Change strings to lists for easier lookup
        self.sort_list = dict([(SORT_CHARACTERS[x],x) for x in range(len(SORT_CHARACTERS))])
        self.invisible_list = set([ SORT_INVISIBLE[x] for x in range(len(SORT_INVISIBLE)) ])
        self.initialize()

    def initialize(self):
        debug_print('BEGIN Initializing KindleSort')
        self.invisible_version = defaultdict()
        self.no_article_version = defaultdict()
        debug_print('END Initializing KindleSort')

    # Sort collection Kindle style
    # Either just sort names, or also pass in a dictionary of names with values to sort on 
    def sort_names(self, names, reverse_sort=False, sort_text=None):
        debug_print('BEGIN Kindle sorting')
        if sort_text is not None:
            names.sort(cmp=self.sort_compare_kindle_names, key=lambda path: sort_text[path], reverse=reverse_sort)
        else:
            names.sort(cmp=self.sort_compare_kindle_names, reverse=reverse_sort)

        debug_print('END Kindle sorting')
        return names

    # Sort order of invisible characters is random
    def strip_invisible(self, text):
        # Save previous value if there is one to speed up lookups
        if text not in self.invisible_version: 
            temp = ''
            for i in range(len(text)):
                if text[i] not in self.invisible_list: 
                    temp += text[i] 
            if text != temp and len(temp) > 0:
                self.invisible_version[text] = temp
            else:
                self.invisible_version[text] = text
    
        return self.invisible_version[text]
    
    # Remove articles at start of text
    def strip_articles(self, text):
        # Save previous value if there is one to speed up lookups
        if text not in self.no_article_version: 
            self.no_article_version[text] = (text, '')
            for article in SORT_ARTICLES:
                temp = re.sub('^' + article + '[^_0-9a-z]', '', text)
                if text != temp:
                    self.no_article_version[text] = (temp, article)
                    break;
        return (self.no_article_version[text])
    
    # Compare two names Kindle style
    def sort_compare_kindle_names(self, a, b):
        # Ignore case - Kindle doesn't sort Aa and aA consistently!
        a = a.lower()
        b = b.lower()

        # Strip invisible characters from the entire name, unless just invisible
        a = self.strip_invisible(a)
        b = self.strip_invisible(b)
    
        # Remove articles at start of text
        (a, articlea) = self.strip_articles(a)
        (b, articleb) = self.strip_articles(b)
    
        # Compare the two strings, character by character using the defined Kindle order
        alen = len(a)
        blen = len(b)
        aval = 0
        bval = 0
        invalid_char = False
        for i in range(min(alen,blen)):
            if a[i] in self.sort_list and b[i] in self.sort_list:
                aval = self.sort_list[a[i]]
                bval = self.sort_list[b[i]]
                if aval != bval:
                    break
            else:
                invalid_char = True
                break
    
        if invalid_char:
                try:
                    compare = cmp(a, b)
                except:
                    compare = True
                    debug_print('Skipping comparison - invalid character found when comparing "%s" and "%s"' % (a,b) )
        else:
            if aval == bval:
                if alen == blen and articlea and articleb:
                    compare = cmp(articlea, articleb)
                else:
                    compare = (alen > blen) - (alen < blen)
            else:        
                compare = (aval > bval) - (aval < bval)
    
        return compare
