# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

__license__   = 'GPL v3'
__copyright__ = '2011, meme'
__docformat__ = 'restructuredtext en'

#####################################################################
# Track books on the Kindle
#####################################################################

import os, re, time, sys
from collections import defaultdict
from calibre.constants import iswindows
from operator import itemgetter

import calibre_plugins.kindle_collections.messages as msg
import calibre_plugins.kindle_collections.kindle_device as kindle_device
import calibre_plugins.kindle_collections.calibre_info as calibre_info
import calibre_plugins.kindle_collections.kindle_collections as kindle_collections
from calibre_plugins.kindle_collections.ebook import EBook
from calibre_plugins.kindle_collections.kindle_sort import KindleSort
from calibre_plugins.kindle_collections.utilities import debug_print, array_to_csv, wording


PERIODICAL_TYPES = set([ '257', '259' ])
PERIODICAL_DATE_TYPES = set([ '257', '258', '259' ]) # Show date instead of author
UNCOLLECTABLE_TYPE = 'none'
VALID_EXTENSIONS = ( 'azw', 'azw1', 'azw2', 'mobi', 'prc', 'tpz', 'txt', 'pdf', 'pdr' )

kbooks = None

#####################################################################

def init(paths):
    global kbooks
    kbooks = KindleBooks(paths)

class KindleBooks():

    def __init__(self, paths):
        self.title_info = defaultdict()
        self.code_paths = defaultdict()
        self.periodical_back_issues = []
        self.error_paths = defaultdict()

        self.sorted_paths = []
        self.uncollectable_codes = []
        self.uncollectable_paths = []
        self.path_invalid_extensions = []
        self.empty_paths = []

        self.ksort = KindleSort()
        self.counts = { 'books': 0, 'newspapers': 0, 'magazines': 0, 'feeds': 0 }

        self.path_info = defaultdict()

        if paths:
            debug_print('BEGIN KindleBooks, %d pathnames' % len(paths))
            self.load_kindle_books(paths)
        else:
            debug_print('BEGIN KindleBooks, no paths')

    def load_kindle_books(self, paths):

        # Get cache info if it exists
        cached_path_info = kindle_device.kdevice.read_path_info_cache()
        if cached_path_info:
            debug_print('Cache file exists')
        else:
            debug_print('Cache file does not exist')

        # Read all files and collect titles and codes
        debug_print('Begin reading book information from Kindle')

        for path in paths:
            fullpath = kindle_device.kdevice.get_fullpath(path)
            debug_print('Checking "%s"' % fullpath)

            try:
                test = path.decode('utf-8')
            except:
                msg.message.warning('This pathname could not be converted to utf-8:\n   %s' % path)

            # Get time of file
            path_time = 0
            path_date = ''
            try:
                path_time = os.stat(fullpath).st_mtime
                path_date = time.ctime(path_time)
            except:
                msg.message.warning('    Failed to get time of file "%s"' % fullpath)
                continue

            debug_print('    File time:  %s' % path_date)
            # Get size of file
            path_size = 0
            try:
                path_size = os.stat(fullpath).st_size
            except:
                debug_print('    Failed to get length of file "%s"' % fullpath)
                continue

            if path_size == 0:
                self.empty_paths.append(path) 

            # Check cache
            use_cache = False
            if cached_path_info and path in cached_path_info:
                cached_time = cached_path_info[path]['time']
                # If data is added to cache update the field checked
                use_cache = cached_time == path_time and 'encoding' in cached_path_info[path]
                debug_print('    Cache time: %s' % time.ctime(cached_time))
            if use_cache:
                self.path_info[path] = cached_path_info[path] 

            # Get extension of file
            valid_extension = path.endswith(VALID_EXTENSIONS)
            if not valid_extension:
                debug_print('    Invalid extension')
#                self.path_invalid_extensions.append(path)

            # Only process files with a valid extension and with contents
            if valid_extension and path_size > 0:
                # Get Kindle code for book and check if valid Kindle format
                valid = True
                if use_cache:
                    debug_print('    Using cached information')
                    cdetype = self.path_info[path]['cdetype']
                    mobi_type = self.path_info[path]['mobi_type']
                    title = self.path_info[path]['title']
                    code = self.path_info[path]['code']
                    path_author = self.path_info[path]['path_author']
                    kindle_author = self.path_info[path]['author']
                    kindle_date = self.path_info[path]['date']
                    sort_date = self.path_info[path]['sort_date']
                    visible = self.path_info[path]['visible']
                    text_encoding = self.path_info[path]['encoding']
                else:
                    debug_print('    Loading information from file')
                    cdetype = None
                    mobi_type = None
                    title = ''
                    code = ''
                    path_author = ''
                    try:
                        kindle_book = EBook(fullpath)
                    except ValueError as error:
                        valid = False
                        self.error_paths[path] = str(error)
                        debug_print('    %s' % error)
                    else:
                        mobi_type = str(kindle_book.mobi_type)
                        cdetype = str(kindle_book.type)

                        text_encoding = kindle_book.text_encoding if kindle_book.text_encoding else 0
                        debug_print('    Text Encoding: %d' % text_encoding)

                        title = kindle_book.title if kindle_book.title else ''
                        path_author = kindle_book.author if kindle_book.author else ''

                        # Convert Windows CP1252 text to unicode (otherwise problems with smart quotes 0x92)
                        if text_encoding == 1252:
                            title = title.decode('cp1252')
                            path_author = path_author.decode('cp1252')

                        kindle_author = self.convert_author(path_author)

                        # Mobi date
                        kindle_date = self.convert_date(kindle_book.pubdate, 'kindle')
                        if not kindle_date:
                            kindle_date = '' 
                        sort_date = self.convert_date(kindle_book.pubdate, 'sort')
                        if not sort_date:
                            sort_date = '' 

                        # Mobi code
                        code = kindle_book.collection_code

                        # The Kindle uses the type embedded in the code name to determine collectability, not the book's cdetype
                        visible = not self.is_uncollectable_code(code)

                if not valid:
                    debug_print('    Skipping book')
                    continue

                author_sort = path_author
                # Check if Calibre information can/should be used - ignores cache 
                lpath = path
                if lpath in calibre_info.ci.lpath_info:
                    in_calibre = True

                    # If author empty, get from Calibre 
                    if not path_author or path_author == '':
                        debug_print('    Using Calibre author for %s' % path)
                        path_author = calibre_info.ci.lpath_info[lpath]['authors']
                        if type(path_author) == list:
                            path_author = ';'.join(path_author) 
                        kindle_author = self.convert_author(path_author)

                    # If author_sort in Calibre, use it
                    if 'author_sort' in calibre_info.ci.lpath_info[lpath]:
                        author_sort = calibre_info.ci.lpath_info[lpath]['author_sort']
                    if type(author_sort) == list:
                         author_sort = ';'.join(author_sort) 
    
                    # If title still empty, try Calibre, then use pathname as title as last resort
                    if not title or title == '':
                        debug_print('    Using Calibre title for %s' % path)
                        title = calibre_info.ci.lpath_info[lpath]['title']
                else:
                    in_calibre = False

                # If still no title, use path name
                if not title or title == '':
                    title = os.path.basename(path) 

                # Count types for report
                if mobi_type == '257':
                    self.counts['newspapers'] += 1
                elif mobi_type == '258':
                    self.counts['feeds'] += 1
                elif mobi_type == '259':
                    self.counts['magazines'] += 1
                else:
                    self.counts['books'] += 1

                # Check if code is uncollectable 
                if not visible:
                    self.uncollectable_codes.append(code)
                    self.uncollectable_paths.append(path)

                # Save info for path.  Include and collection_count are only set and used by Edit
                self.path_info[path] = {'code': code, 'title': title, 'time': path_time, 'date': kindle_date, 'sort_date': sort_date, 'cdetype': cdetype, 'mobi_type': mobi_type, 'author': kindle_author, 'path_author': path_author, 'author_sort': author_sort, 'path_size': path_size, 'in_calibre': in_calibre, 'visible': visible, 'include': False, 'collection_count': 0, 'encoding': text_encoding}

                # Only process if there is a title (e.g. some Mobi files may have errors)
                if title != '':
                    ignore = False

                    # Check if duplicate code found in order to report errors
                    duplicate_code = False
                    if code in self.code_paths:
                        self.code_paths[code].append(path)
                    else:
                        self.code_paths[code] = [ path ]

                    # Save titles per mobi_type
                    if title in self.title_info:
                        if mobi_type in self.title_info[title]:
                            if path not in self.title_info[title][mobi_type]:
                                self.title_info[title][mobi_type].append(path)
                        else:
                            self.title_info[title][mobi_type] = [ path ]
                    else:
                        self.title_info[title] = { mobi_type: [ path ] }
                else:
                    debug_print('No title')

        debug_print('End reading book information from Kindle')

        # Save path information to cache file
        kindle_device.kdevice.save_path_info_cache(self.path_info)

        debug_print('Checking for internal books')
        # Check for any codes in collections belonging to books not in filesystem
        # Set visible to False even though some internal dictionaries may be visible
        for collection in kindle_collections.kc.get_unsorted_names():
            for code in kindle_collections.kc.get_book_codes(collection):
                if code not in self.code_paths:
                    title = ' Book with no file - ' + code
                    path = title
                    self.code_paths[code] = [ path ]
                    self.path_info[path] = {'code': code, 'title': title, 'time': 0, 'date': '', 'sort_date': '', 'cdetype': '', 'mobi_type': '', 'author': '', 'path_author': '', 'author_sort': '', 'path_size': 0, 'in_calibre': False, 'visible': False, 'include': False, 'collection_count': 0}
                    debug_print('Internal book: %s' % title)

        # Identify Periodicals to move all but 1 to Periodicals: Back Issues
        debug_print('Begin Checking Titles')
        for title in self.title_info:
            for mobi_type in self.title_info[title]:
                num_titles = len(self.title_info[title][mobi_type])
                # Periodical depends on the book's Mobi Type not cdeType and not the code
                if num_titles > 1 and self.is_periodical_type(mobi_type):
                    debug_print('Periodical: %s  Type: %s  Num Titles: %d' % (title, mobi_type, num_titles))
                    most_recent_time = 0
                    most_recent_path = ''
                    for path in self.title_info[title][mobi_type]:
                        ptime = self.path_info[path]['time']
                        if ptime > most_recent_time:
                            most_recent_time = ptime
                            most_recent_path = path
                    for path in self.title_info[title][mobi_type]:
                        if path != most_recent_path:
                            self.add_periodical_back_issue(path)
                            debug_print('    Back issue:    %s' % path)
                        else:
                            debug_print('    Current issue: %s' % path)
        debug_print('End Checking Titles')
        
        debug_print('Begin Reporting problems')
        # Report invalid Mobi books
        if self.error_paths:
            msg.message.warning('\nThese files contain invalid Mobi book header information and will be ignored by the Kindle.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending it to the device.\n', False)
            for path in self.error_paths.keys():
                msg.message.warning('    %s\n' % kindle_device.kdevice.get_fullpath(path))

#        # Report invalid extensions
#        if self.path_invalid_extensions:
#            msg.message.report('\nThese files have invalid extensions and will be ignored by the Kindle:\n')
#            for path in self.path_invalid_extensions:
#                msg.message.report('    %s\n' % kindle_device.kdevice.get_fullpath(path))

        # Report 0 length files
        if self.empty_paths:
            msg.message.report('\nThese files are empty and will be ignored by the Kindle:\n')
            for path in self.empty_paths:
                msg.message.report('    %s\n' % kindle_device.kdevice.get_fullpath(path))

        # Report files with the same code
        for code in self.code_paths:
            if len(self.code_paths[code]) > 1:
                msg.message.report('\nThese files all have the same Kindle code - you should remove the extra copies:\n')
                for path in self.code_paths[code]:
                    msg.message.report('    Path:  %s\n    Title: %s\n    Code:  %s\n' % (kindle_device.kdevice.get_fullpath(path), self.path_info[path]['title'], code))

        # Report files as the same book if they match title, author, and date
        for title in self.title_info:
            num_titles = 0
            tpaths = []
            for mobi_type in self.title_info[title].keys():
                tpaths += self.title_info[title][mobi_type]

            if len(tpaths) > 1:
                authors = defaultdict()
                for p in tpaths:
                    author = self.path_info[p]['path_author']
                    if author in authors:
                        authors[author].append(p)
                    else:
                        authors[author] = [ p ]

                for a in authors:
                    if len(authors[a]) > 1:
                        dates = defaultdict()
                        for p2 in authors[a]:
                            date = self.path_info[p2]['date']
                            if date in dates:
                                dates[date].append(p2)
                            else:
                                dates[date] = [ p2 ]
                        for d in dates:
                            if len(dates[d]) > 1:
                                msg.message.report('\nThese files all have the same title, author and date - you may want to remove the extra copies:\n')
                                for path in dates[d]:
                                    msg.message.report('    Path:   %s\n    Title:  %s\n    Author: %s\n    Date:   %s\n' % (kindle_device.kdevice.get_fullpath(path), title, a, d))

        # Report files that cannot be put into collections:
        if self.uncollectable_paths:
            msg.message.report('\nThese files will not be visible in any collections on the Kindle because the Mobi book type is not set correctly.  If the book is in Calibre, run View Report and check the book\'s details for a WARNING message. Also see Troubleshooting under this plugin\'s Help page:\n')
            for path in self.uncollectable_paths:
                msg.message.report('    Path:  %s\n    Title: %s\n    Code:  %s\n    Type:  %s\n' % (kindle_device.kdevice.get_fullpath(path), self.path_info[path]['title'], self.path_info[path]['code'], self.get_type(self.path_info[path]['code'])))

        debug_print('End Reporting problems')

        debug_print('END KindleBooks')

    # Kindle uses the code in the collection to get the cdetype when checking if the code is visible in the collection, not the cdetype in the file
    def is_uncollectable_code(self, code):
        return self.get_type(code).lower() == UNCOLLECTABLE_TYPE

    def add_periodical_back_issue(self, path):
        self.periodical_back_issues.append(path)

    def get_type(self, code):
        book_type = ''
        if code:
            l = len(code)
            if l > 6 and code[0] == '#':
                book_type = code[l-4:]
        return book_type

    def convert_date(self, pub_date, date_type):
        from datetime import date 
        WEEKDAYS = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun' ]
        MONTHS = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]

        kdate = pub_date
        if kdate:
            if re.match(   '^([0-9][0-9][0-9][0-9])[^0-9]([0-9][0-9])[^0-9]([0-9][0-9]).*$', pub_date):
                d = re.sub('^([0-9][0-9][0-9][0-9])[^0-9]([0-9][0-9])[^0-9]([0-9][0-9]).*$', r'\1|\2|\3', pub_date)
                (year, month, day) = d.split('|')
                if year and month and day:
                    date_instance = date(int(year), int(month), int(day))
                    weekday_name = WEEKDAYS[date_instance.weekday()]
                    month_name= MONTHS[int(month)-1]
                    if date_type == 'kindle':
                        kdate = '%s, %s %s, %s' % (weekday_name, month_name, day, year)
                    else:
                        kdate = '%s-%s-%s' % (year, month, day)
            elif re.match(   '^([0-9][0-9])[^0-9]([a-zA-Z][a-zA-Z][a-zA-Z])[^0-9]([0-9][0-9][0-9][0-9]).*$', pub_date):
                d = re.sub('^([0-9][0-9])[^0-9]([a-zA-Z][a-zA-Z][a-zA-Z])[^0-9]([0-9][0-9][0-9][0-9]).*$', r'\1|\2|\3', pub_date)
                (day, month_name, year) = d.split('|')
                if year and month_name and day:
                    month = MONTHS.index(month_name) + 1
                    date_instance = date(int(year), int(month), int(day))
                    weekday_name = WEEKDAYS[date_instance.weekday()]
                    if date_type == 'kindle':
                        kdate = '%s, %s %s, %s' % (weekday_name, month_name, day, year)
                    else:
                        kdate = '%s-%s-%s' % (year, month, day)
        return kdate

    def is_periodical_type(self, type):
        return type in PERIODICAL_TYPES

    def is_periodical_date_type(self, type):
        return type in PERIODICAL_DATE_TYPES

    # Change author field into First Last and First2 Last2 format
    def convert_author(self, author):
        clean_new_authors = ''
        if author:
            authors = author if type(author) == list else author.split(';')
            new_authors = ''
            for a in authors:
                if ',' in a:
                    (last, sep, first) = a.partition(',')
                    first = re.sub('^ ', '', first)
                    a = first + ' ' + last
                new_authors = new_authors + a + ' and '
            clean_new_authors = re.sub(' and $','', new_authors)
        return clean_new_authors

    # Count books in a collection - ignore uncollectable
    def get_visible_book_count(self, collection):
        length = 0
        if collection in kindle_collections.kc.collections:
            for book_code in kindle_collections.kc.get_book_codes(collection):
                if book_code not in self.uncollectable_codes:
                    length += 1
        elif collection in kindle_collections.kc.deleted_names:
            length = kindle_collections.kc.deleted_names[collection]
        return length

    # List (title, author_or_date) tuple of all titles for Title view except those in periodical back issues
    def get_visible_unsorted_titles_and_info(self):
        title_info_list = []
        for path in self.path_info.keys():
            if path not in self.periodical_back_issues:
                info = self.get_book_author_date(path)
                title_info = (self.path_info[path]['title'], info)
                title_info_list.append(title_info)
        return title_info_list

    def get_visible_codes(self, collection):
        visible_codes = []
        if collection in kindle_collections.kc.collections:
            for book_code in kindle_collections.kc.get_book_codes(collection):
                if book_code not in self.uncollectable_codes:
                    visible_codes.append(book_code)
        return visible_codes

    def get_book_author_date(self, path):
        if self.is_periodical_date_type(self.path_info[path]['mobi_type']):
            info = self.path_info[path]['date']
        else:
            info = self.path_info[path]['author']
        return info

    # Return title with pathname after it for sorting/displaying info
    def get_code_titlepath(self, code):
        titlepath = ''
        if code in self.code_paths:
            # Use first path saved for the code since we can't tell which path a duplicate code should point to
            path = self.code_paths[code][0]
            titlepath = self.path_info[path]['title'] + '<<>>PATH<<>>' + path
        return titlepath

    def get_code_title(self, code):
        title = self.path_info[self.code_paths[code][0]]['title'] if code in self.code_paths else ''
        return title

    # Since a code may be used by more than one path, guess at which path to return
    def get_paths_from_codes(self):
        paths = []
        for code in self.code_paths.keys():
            paths.append(self.code_paths[code][0])
        return paths

    # Return a list of paths sorted by selected field (sub sorted based on last sort) given a list of book codes 
    def get_sorted_paths(self, book_paths, sort_field, reverse_sort=False):
        debug_print('BEGIN Get sorted paths - %d codes: field %s, reverse=%s, previous paths=%d' % (len(book_paths), sort_field, reverse_sort, len(self.sorted_paths)))

        # Get the text to sort on based on the field and store per pathname
        names = []
        sort_text = defaultdict()
        for path in book_paths:
            if sort_field == 'title':
                name = self.path_info[path][sort_field].lower()
            elif sort_field == 'author':
                name = self.path_info[path]['author_sort'].lower()
            elif sort_field == 'date':
                name = self.path_info[path]['sort_date']
            elif sort_field in [ 'include', 'in_calibre', 'visible' ]:
                name = 'a' if self.path_info[path][sort_field] else 'b'
            elif sort_field == 'collections':
                name = str(self.path_info[path]['collection_count'])
            else:
                # If no sort field, sort on path name
                name = path.lower()
            sort_text[path] = name

        if len(self.sorted_paths) == 0:
            self.sorted_paths = sort_text.keys()

        # Sort the names
        if sort_field == 'title':
            # Sort titles using special Kindle sort
            self.sorted_paths = self.ksort.sort_names(self.sorted_paths, reverse_sort, sort_text)
        else:
            self.sorted_paths.sort(key=lambda path: sort_text[path], reverse=reverse_sort)

        debug_print('END Get Sorted Paths - %d paths' % len(self.sorted_paths))
        return self.sorted_paths

    def get_len(self):
        return len(self.code_paths)

    def set_path_info(self, path, field, value):
        if path in self.path_info:
            self.path_info[path][field] = value

