#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
__docformat__ = 'restructuredtext en'

import os, re, shutil, traceback
from calibre.ebooks.metadata import check_isbn, title_sort
from calibre.gui2 import info_dialog, choose_dir, error_dialog
from calibre.utils.config import prefs
from calibre.utils.titlecase import titlecase

import calibre_plugins.quality_check.config as cfg
from calibre_plugins.quality_check.check_base import BaseCheck


class MetadataCheck(BaseCheck):
    '''
    All checks related to working with book metadata.
    '''
    def perform_check(self, menu_key):
        if menu_key == 'check_title_sort':
            self.check_title_sort_valid()
        elif menu_key == 'check_author_sort':
            self.check_author_sort_valid()
        elif menu_key == 'check_isbn':
            self.check_isbn_valid()
        elif menu_key == 'check_pubdate':
            self.check_pubdate_valid()
        elif menu_key == 'check_dup_isbn':
            self.check_duplicate_isbn()
        elif menu_key == 'check_dup_series':
            self.check_duplicate_series()
        elif menu_key == 'check_excess_tags':
            self.check_tags_count()
        elif menu_key == 'check_html_comments':
            self.check_html_comments()
        elif menu_key == 'check_no_html_comments':
            self.check_no_html_comments()
        elif menu_key == 'check_authors_commas':
            self.search_metadata(cfg.KEY_SEARCH_AUTHOR_COMMAS)
        elif menu_key == 'check_authors_no_commas':
            self.search_metadata(cfg.KEY_SEARCH_AUTHOR_NOCOMMAS)
        elif menu_key == 'check_titles_series':
            self.search_metadata(cfg.KEY_SEARCH_TITLE_SERIES)
        elif menu_key == 'check_title_case':
            self.check_titlecase()
        elif menu_key == 'check_fix_book_size':
            self.check_and_update_file_sizes()
        elif menu_key == 'cleanup_opf_files':
            self.cleanup_opf_folders()
        else:
            return error_dialog(self.gui, _('Quality Check failed'),
                                _('Unknown menu key for %s of \'%s\'')%('EpubCheck', menu_key),
                                show=True, show_copy_button=False)

    def search_metadata(self, config_key):
        c = cfg.plugin_prefs[cfg.STORE_NAME]
        search = c.get(config_key, cfg.DEFAULT_STORE_VALUES[config_key])
        self.gui.search.set_search_string(search)


    def check_title_sort_valid(self):

        def evaluate_book(book_id, db):
            current_title_sort = db.title_sort(book_id, index_is_id=True)
            title = db.title(book_id, index_is_id=True)
            if current_title_sort != title_sort(title):
                return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid Title Sort',
                             marked_text='invalid_title_sort',
                             status_msg_type='books for invalid title sort')


    def check_author_sort_valid(self):

        def evaluate_book(book_id, db):
            current_author_sort = db.author_sort(book_id, index_is_id=True)
            authors = db.authors(book_id, index_is_id=True)
            authors = [a.strip().replace('|', ',') for a in authors.split(',')]
            if current_author_sort != db.author_sort_from_authors(authors):
                return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid Author Sort',
                             marked_text='invalid_author_sort',
                             status_msg_type='books for invalid author sort')


    def check_isbn_valid(self):

        def evaluate_book(book_id, db):
            isbn = db.isbn(book_id, index_is_id=True)
            if isbn:
                if not check_isbn(isbn):
                    return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid ISBN',
                             marked_text='invalid_isbn',
                             status_msg_type='books for invalid ISBN')


    def check_pubdate_valid(self):

        def evaluate_book(book_id, db):
            pubdate = db.pubdate(book_id, index_is_id=True)
            timestamp = db.timestamp(book_id, index_is_id=True)
            if pubdate == timestamp:
                return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid pubdate',
                             marked_text='invalid_pubdate',
                             status_msg_type='books for invalid pubdate')


    def check_duplicate_isbn(self):

        books_by_isbn = {}

        def evaluate_book(book_id, db):
            isbn = db.isbn(book_id, index_is_id=True)
            if isbn:
                if isbn not in books_by_isbn:
                    books_by_isbn[isbn] = set()
                books_by_isbn[isbn].add(book_id)
            # We will determine the match as a post step, not in this function
            return False

        total_count, result_ids, cancelled = self.check_all_files(evaluate_book, show_matches=False,
                                                                  status_msg_type='books for duplicate ISBN')
        if not cancelled:
            result_ids = list()
            for values in books_by_isbn.values():
                if len(values) > 1:
                    result_ids.extend(values)
            # Time to display the results
            if len(result_ids) > 0:
                self.show_invalid_rows(result_ids, 'duplicate_isbn')

            msg = 'Checked %d books, found %d matches' %(total_count, len(result_ids))
            self.gui.status_bar.showMessage(msg)
            if len(result_ids) == 0:
                info_dialog(self.gui, 'No Matches',
                                   'All searched books have unique ISBNs', show=True)


    def check_duplicate_series(self):

        books_by_series = {}

        def evaluate_book(book_id, db):
            series = db.series(book_id, index_is_id=True)
            if series:
                series_index = db.series_index(book_id, index_is_id=True)
                hash = '%s%0.4f'%(series, series_index)
                if hash not in books_by_series:
                    books_by_series[hash] = set()
                books_by_series[hash].add(book_id)
            # We will determine the match as a post step, not in this function
            return False

        total_count, result_ids, cancelled = self.check_all_files(evaluate_book, show_matches=False,
                                                                  status_msg_type='books for duplicate series')
        if not cancelled:
            result_ids = list()
            for values in books_by_series.values():
                if len(values) > 1:
                    result_ids.extend(values)
            # Time to display the results
            if len(result_ids) > 0:
                self.show_invalid_rows(result_ids, 'duplicate_series')
                self.gui.library_view.sort_by_named_field('series', True)

            msg = 'Checked %d books, found %d matches' %(total_count, len(result_ids))
            self.gui.status_bar.showMessage(msg)
            if len(result_ids) == 0:
                info_dialog(self.gui, 'No Matches',
                                   'All searched books have unique series indexes', show=True)


    def check_tags_count(self):
        c = cfg.plugin_prefs[cfg.STORE_NAME]
        max_tags = c[cfg.KEY_MAX_TAGS]
        excluded_tags_set = set(c[cfg.KEY_MAX_TAG_EXCLUSIONS])

        def evaluate_book(book_id, db):
            tags = db.tags(book_id, index_is_id=True)
            if tags:
                tags = [t.strip() for t in tags.split(',')]
                tags_set = set(tags) - excluded_tags_set
                if len(tags_set) > max_tags:
                    return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid tag count',
                             marked_text='excess_tags',
                             status_msg_type='books for invalid tag count')


    def check_html_comments(self):
        html_patterns = [re.compile(pat, re.IGNORECASE) for pat in
                [
                    r'</b>',
                    r'</i>',
                    r'</s>',
                    r'</u>',
                    r'</a>',
                    r'</h\d+>',
                    r'</sub>',
                    r'</sup>',
                    r'</ol>',
                    r'</ul>',
                    r'</li>'
                ]
        ]

        def evaluate_book(book_id, db):
            comments = db.comments(book_id, index_is_id=True)
            if comments:
                has_html = False
                for pat in html_patterns:
                    if pat.search(comments):
                        has_html = True
                        break
                if has_html:
                    return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have no HTML in comments',
                             marked_text='html_in_comments',
                             status_msg_type='books for no HTML in comments')


    def check_no_html_comments(self):
        no_html_patterns = [re.compile(pat, re.IGNORECASE) for pat in
                [
                    r'</b>',
                    r'</i>',
                    r'</s>',
                    r'</u>',
                    r'</a>',
                    r'</h\d+>',
                    r'</sub>',
                    r'</sup>',
                    r'</ol>',
                    r'</ul>',
                    r'</li>',
                    r'</p>',
                    r'</div>'
                ]
        ]

        def evaluate_book(book_id, db):
            comments = db.comments(book_id, index_is_id=True)
            if comments:
                has_no_html = True
                for pat in no_html_patterns:
                    if pat.search(comments):
                        has_no_html = False
                        break
                if has_no_html:
                    return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have HTML in comments',
                             marked_text='no_html_in_comments',
                             status_msg_type='books for HTML in comments')


    def check_titlecase(self):

        def evaluate_book(book_id, db):
            title = db.title(book_id, index_is_id=True)
            if title != titlecase(title):
                return True
            return False

        self.check_all_files(evaluate_book,
                             no_match_msg='All searched books have a valid title casing',
                             marked_text='invalid_title_case',
                             status_msg_type='books for invalid titlecase')


    def check_and_update_file_sizes(self):
        self.updated_format_count = 0

        def evaluate_book(book_id, db):
            formats = db.formats(book_id, index_is_id=True, verify_formats=False)
            if not formats:
                return False
            mark_book = False
            for format in formats.split(','):
                db_size = db.sizeof_format(book_id, format, index_is_id=True)
                book_path = db.format_abspath(book_id, format, index_is_id=True)
                if not book_path:
                    self.log.error('Unable to find path to book id:', book_id, db.title(book_id, index_is_id=True))
                    continue
                if os.path.exists(book_path):
                    actual_size = os.path.getsize(book_path)
                    if actual_size != db_size:
                        mark_book = True
                        self.updated_format_count += 1
                        db.conn.execute('UPDATE data SET uncompressed_size=? WHERE format=? AND book=?',
                              (actual_size, format, book_id))
            if mark_book:
                db.conn.commit()
            return mark_book

        total_count, result_ids, cancelled = self.check_all_files(evaluate_book,
                                                                  marked_text='file_size_updated',
                                                                  status_msg_type='books for invalid file sizes')
        if not cancelled:
            msg = 'Checked %d books, updated %d format sizes in %d books' % \
                        (total_count, self.updated_format_count, len(result_ids))
            self.gui.status_bar.showMessage(msg)
            if len(result_ids) == 0:
                return info_dialog(self.gui, 'No Matches', 'All book format sizes are correct', show=True)
            self.gui.library_view.model().refresh_ids(list(result_ids))


    def cleanup_opf_folders(self):
        '''
        Requested by theducks. Caters for a behaviour in Calibre whereby using the
        "Remove books from device" menu option against a folder will only delete the
        book formats and not any cover.jpg or .opf files, leaving orphaned files.
        '''
        path = choose_dir(self.gui, 'quality check plugin:clean empty folder dialog',
                'Choose directory to cleanup')
        if not path:
            return
        library_path = prefs['library_path']
        if path.startswith(library_path):
            return error_dialog(self.gui, 'Invalid Folder',
                    'You should not run this feature against a Calibre library folder.<br>' +
                    'If you do you will remove "Empty book" entries and corrupt your database.',
                    show=True)

        messages = []
        errors = []
        # For our very top level folder we will NEVER delete this.
        self._cleanup_directory_if_needed(path, messages, errors, delete_parent=False)

        if len(messages) == 0 and len(errors) == 0:
            return info_dialog(self.gui, 'No files deleted',
                               'No files/folders were found to be deleted', show=True)

        msg = 'Deleted %d files/folders with %d errors. See details for more info.' % \
                (len(messages), len(errors))
        messages.extend(errors)
        det_msg = '\n'.join(messages)
        return info_dialog(self.gui, 'Cleanup completed', msg, det_msg=det_msg, show=True)


    def _cleanup_directory_if_needed(self, dir, messages, errors, delete_parent=True):
        self.log('Analysing folder', dir)
        self._delete_orphaned_opf_files(dir, messages, errors)

        files = os.listdir(dir)
        safe_to_delete_folder = True
        for filename in files:
            full_path = os.path.join(dir, filename)
            if os.path.isdir(full_path):
                if not self._cleanup_directory_if_needed(full_path, messages, errors):
                    # As we still have a subfolder, cannot delete this parent
                    self.log('Non empty subfolder', full_path)
                    safe_to_delete_folder = False
            else:
                # Any other file being present in this folder means we should not delete it
                safe_to_delete_folder = False

        if safe_to_delete_folder and delete_parent:
            self.log('Removing folder', dir)
            try:
                shutil.rmtree(dir)
                messages.append('Removed folder: %s' % dir)
            except:
                self.log.error('Unable to remove folder:', dir)
                self.log(traceback.format_exc())
                errors.append('ERROR removing folder: %s'%dir)
                safe_to_delete_folder = False
        return safe_to_delete_folder


    def _delete_orphaned_opf_files(self, dir, messages, errors):
        all_files = os.listdir(dir)
        all_opf_files = [f for f in all_files if f.lower().endswith('.opf')]
        all_non_opf_files = set(all_files) - set(all_opf_files)
        files_to_delete = []

        for opf_file in all_opf_files:
            base, extension = os.path.splitext(opf_file)
            matching_files = [f for f in all_non_opf_files if f.lower().startswith(base.lower()+'.')]
            self.log('  Analysing opf file: ', opf_file)
            self.log('  Matching files: ', matching_files)
            safe_to_delete = True

            for m in matching_files:
                matching_extension = os.path.splitext(m)[1]
                if matching_extension.lower() != '.jpg':
                    self.log('  Cannot remove .opf because found: ', m)
                    safe_to_delete = False
                    break
            if safe_to_delete:
                self.log('  Safe to delete: ', opf_file)
                files_to_delete.append(os.path.join(dir, opf_file))
                for m in matching_files:
                    files_to_delete.append(os.path.join(dir, m))

        for f in files_to_delete:
            self.log('  Removing file', f)
            try:
                os.remove(f)
                messages.append('Removed file: %s'%f)
            except:
                self.log.error('Unable to remove file:', f)
                self.log(traceback.format_exc())
                errors.append('ERROR removing file: %s'%f)


