#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
from calibre.ebooks.oeb.base import OPF_NSES

__license__   = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
__docformat__ = 'restructuredtext en'

import traceback, os, posixpath, urllib, re
import calibre_plugins.quality_check.config as cfg
from lxml import etree

from calibre import guess_type
from calibre.gui2 import error_dialog
from calibre.ebooks.epub.fix import InvalidEpub
from calibre.ebooks.epub.fix.container import OCF_NS, OPF_NS
from calibre.utils.zipfile import ZipFile

from calibre_plugins.quality_check.check_base import BaseCheck

META_INF = {
        'container.xml' : True,
        'manifest.xml' : False,
        'encryption.xml' : False,
        'metadata.xml' : False,
        'signatures.xml' : False,
        'rights.xml' : False,
}

class EpubCheck(BaseCheck):
    '''
    All checks related to working with ePub formats.
    '''
    def perform_check(self, menu_key):
        if menu_key == 'check_epub_jacket':
            self.check_epub_jacket(check_has_jacket=True)
        elif menu_key == 'check_epub_legacy_jacket':
            self.check_epub_jacket(check_has_jacket=True, check_legacy_only=True)
        elif menu_key == 'check_epub_multi_jacket':
            self.check_epub_multiple_jacket()
        elif menu_key == 'check_epub_no_jacket':
            self.check_epub_jacket(check_has_jacket=False)
        elif menu_key == 'check_epub_xpgt':
            self.check_epub_xpgt_margins()
        elif menu_key == 'check_epub_itunes':
            self.check_epub_itunes_plist()
        elif menu_key == 'check_epub_bookmark':
            self.check_epub_calibre_bookmarks()
        elif menu_key == 'check_epub_cover':
            self.check_epub_calibre_cover(check_has_cover=True)
        elif menu_key == 'check_epub_no_cover':
            self.check_epub_calibre_cover(check_has_cover=False)
        elif menu_key == 'check_epub_toc_size':
            self.check_epub_toc_size()
        elif menu_key == 'check_epub_fonts':
            self.check_epub_fonts()
        elif menu_key == 'check_epub_drm':
            self.check_epub_drm()
        elif menu_key == 'check_epub_converted':
            self.check_epub_conversion(check_converted=True)
        elif menu_key == 'check_epub_not_converted':
            self.check_epub_conversion(check_converted=False)
        elif menu_key == 'check_epub_no_container':
            self.check_epub_no_container()
        elif menu_key == 'check_epub_namespaces':
            self.check_epub_namespaces()
        elif menu_key == 'check_epub_files_missing':
            self.check_epub_opf_files_missing()
        elif menu_key == 'check_epub_unman_files':
            self.check_epub_unmanifested_files()
        elif menu_key == 'check_epub_custom_files':
            self.check_epub_custom_files()
        else:
            return error_dialog(self.gui, _('Quality Check failed'),
                                _('Unknown menu key for %s of \'%s\'')%('EpubCheck', menu_key),
                                show=True, show_copy_button=False)


    def check_epub_jacket(self, check_has_jacket, check_legacy_only=False):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if 'jacket' in resource_name and resource_name.endswith('.xhtml'):
                            html = zf.read(resource_name)
                            if not check_legacy_only and self._is_current_jacket(html):
                                return check_has_jacket
                            if self._is_legacy_jacket(html):
                                return check_has_jacket
                return not check_has_jacket
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return not check_has_jacket
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return not check_has_jacket

        if check_legacy_only:
            msg = 'No searched ePub books have legacy jackets'
            marked_text = 'epub_has_legacy_jacket'
        elif check_has_jacket:
            msg = 'No searched ePub books have jackets'
            marked_text = 'epub_has_jacket'
        else:
            msg = 'All searched ePub books have jackets'
            marked_text = 'epub_missing_jacket'
        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             status_msg_type='ePub books for jackets',
                             no_match_msg=msg, marked_text=marked_text)


    def check_epub_multiple_jacket(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                jacket_count = 0
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if 'jacket' in resource_name and resource_name.endswith('.xhtml'):
                            html = zf.read(resource_name)
                            if self._is_current_jacket(html) or \
                               self._is_legacy_jacket(html):
                                jacket_count += 1
                return jacket_count > 1
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have multiple jackets',
                             marked_text='epub_multiple_jacket',
                             status_msg_type='ePub books for multiple jackets')


    def _is_legacy_jacket(self, html):
        if html.find('<h1 class="calibrerescale') != -1 or \
           html.find('<h2 class="calibrerescale') != -1:
            return True
        return False

    def _is_current_jacket(self, html):
        if html.find('<meta content="jacket" name="calibre-content"') != -1 or \
           html.find('<meta name="calibre-content" content="jacket"') != -1:
            return True
        return False


    def check_epub_xpgt_margins(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if not resource_name.lower().endswith('page-template.xpgt'):
                            continue
                        xpgt_content = zf.read(resource_name)
                        if 'margin' in xpgt_content:
                            return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have non-zero .xpgt margins',
                             marked_text='epub_xpgt_margins',
                             status_msg_type='ePub books for .xpgt margins')


    def check_epub_custom_files(self):

        c = cfg.plugin_prefs[cfg.STORE_NAME]
        custom_extensions = c.get(cfg.KEY_CUSTOM_EXTENSION, cfg.DEFAULT_STORE_VALUES[cfg.KEY_CUSTOM_EXTENSION])
        self.log.debug('Testing extensions: ', custom_extensions)
    
        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            self.log.info('Testing book: ', path_to_book)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        for custom_extension in custom_extensions:
                            if not resource_name.lower().endswith(custom_extension.lower()):
#                                self.log.error('Testing extension: ', custom_extension)
#                                self.log.error('skip: ', resource_name.lower(), path_to_book)
                                continue
                            else:
                                self.log.debug('Testing extension: ', custom_extension)
                                self.log.debug('FOUND: ', resource_name.lower(), path_to_book)
                                return True
                        self.log.error('skip: ', resource_name.lower(), path_to_book)
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have custom extensions',
                             marked_text='epub_custom_extensions',
                             status_msg_type='ePub books with custom extensions')


    def check_epub_unmanifested_files(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        manifest_items_map = self._get_opf_items_map(zf, opf_name)
                        names = list(self._manifest_worthy_names(zf))
                        for name in names:
                            if name not in manifest_items_map:
                                if name.lower().endswith('calibre_bookmarks.txt') or \
                                   name.lower() == 'itunesmetadata.plist':
                                    self.log.debug('Ignoring unmanifested itunes/bookmark file:', name, 'in', path_to_book)
                                    continue
                                self.log.info('Unmanifested file: ', name, 'in', path_to_book)
                                return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have unmanifested files',
                             marked_text='epub_unmanifested_files',
                             status_msg_type='ePub books for unmanifested files')


    def check_epub_itunes_plist(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if resource_name.lower() == 'itunesmetadata.plist':
                            return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have itunes plist files',
                             marked_text='epub_itunes_plist',
                             status_msg_type='ePub books for iTunes files')


    def check_epub_calibre_bookmarks(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if resource_name.lower().endswith('calibre_bookmarks.txt'):
                            return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have calibre bookmarks files',
                             marked_text='epub_calibre_bookmarks',
                             status_msg_type='ePub books for calibre bookmarks')


    def check_epub_calibre_cover(self, check_has_cover):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        cover_name = self._get_opf_item(zf, opf_name,
                                    xpath=r'child::opf:guide/opf:reference'
                                           '[@type="cover"and @href]')
                        if cover_name and cover_name.endswith('.xhtml'):
                            html = zf.read(cover_name)
                            if html.find('<meta content="true" name="calibre:cover"') != -1 or \
                               html.find('<meta name="calibre:cover" content="true"') != -1:
                                return check_has_cover
                return not check_has_cover
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return not check_has_cover
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return not check_has_cover

        if check_has_cover:
            msg = 'No searched ePub books have Calibre covers embedded'
            marked_text = 'epub_has_calibre_cover'
        else:
            msg = 'All searched ePub books have jackets'
            marked_text = 'epub_missing_calibre_cover'
        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg=msg, marked_text=marked_text,
                             status_msg_type='ePub books for Calibre covers')


    def check_epub_conversion(self, check_converted):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        opf_xml = zf.read(opf_name)
                        if opf_xml.find('name="calibre:timestamp"') != -1 or \
                           opf_xml.find('<dc:contributor opf:role="bkp">calibre ') != -1:
                            return check_converted
                return not check_converted
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return not check_converted
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return not check_converted

        if check_converted:
            msg = 'No searched ePub books have been converted by Calibre'
            marked_text = 'epub_calibre_converted'
        else:
            msg = 'All searched ePub books have been converted by Calibre'
            marked_text = 'epub_not_calibre_converted'
        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg=msg, marked_text=marked_text,
                             status_msg_type='ePub books for Calibre conversions')


    def check_epub_no_container(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        return False
                return True
            except InvalidEpub:
                return True
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='All searched ePub books have a valid container.xml file',
                             marked_text='epub_missing_container_xml',
                             status_msg_type='ePub books for missing container.xml')


    def check_epub_namespaces(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    if 'META-INF/container.xml' not in contents:
                        # We have no container xml so file is completely knackered
                        self.log.error('Missing container.xml file in', path_to_book)
                        return True
                    data = zf.read('META-INF/container.xml')
                    if OCF_NS not in data:
                        self.log('Incorrect container.xml namespace in', path_to_book)
                        return True
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        data = zf.read(opf_name)
                        if OPF_NS not in data:
                            self.log('Incorrect .opf manifest namespace in', path_to_book)
                            return True
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='All searched ePub books have valid namespaces',
                             marked_text='epub_namespace_invalid',
                             status_msg_type='ePub books for namespaces check')


    def check_epub_opf_files_missing(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        manifest_items_map = self._get_opf_items_map(zf, opf_name)
                        contents = zf.namelist()
                        for name in manifest_items_map:
                            if name not in contents:
                                self.log.info('Missing file: ', name, 'in', path_to_book)
                                return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='All searched ePub books have a valid opf manifest',
                             marked_text='epub_manifest_files_missing',
                             status_msg_type='ePub books for missing files in opf')


    def check_epub_toc_size(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    opf_name = self._get_opf_xml(path_to_book, zf)
                    if opf_name:
                        ncx_name = self._get_opf_item(zf, opf_name,
                                    xpath=r'child::opf:manifest/opf:item'
                                           '[@media-type="%s" and @href]'%guess_type('a.ncx')[0])
                        if ncx_name:
                            ncx_xml = zf.read(ncx_name)
                            count = len(ncx_xml.split('<navLabel>')) - 1
                            if count >= 3:
                                return False
                return True
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='All searched ePub books have a TOC with at least 3 items',
                             marked_text='epub_toc_too_small',
                             status_msg_type='ePub books for TOC count')


    def check_epub_fonts(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if resource_name.lower().endswith('.ttf') or resource_name.lower().endswith('.otf'):
                            return True
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have embedded fonts',
                             marked_text='epub_embedded_fonts',
                             status_msg_type='ePub books for embedded fonts')


    def check_epub_drm(self):

        def evaluate_book(book_id, db):
            path_to_book = db.format_abspath(book_id, 'EPUB', index_is_id=True)
            try:
                with ZipFile(path_to_book, 'r') as zf:
                    contents = zf.namelist()
                    for resource_name in contents:
                        if resource_name.lower().endswith('encryption.xml'):
                            root = etree.fromstring(zf.read(resource_name))
                            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
                                algorithm = em.get('Algorithm', '')
                                if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
                                    return True
                            return False
                return False
            except InvalidEpub:
                self.log.error('Invalid epub: ', path_to_book)
                return False
            except:
                self.log.error('ERROR parsing book: ', path_to_book)
                self.log(traceback.format_exc())
                return False

        self.check_all_files(evaluate_book, initial_search='formats:epub',
                             no_match_msg='No searched ePub books have DRM',
                             marked_text='epub_drm',
                             status_msg_type='ePub books for DRM')


    def _get_opf_xml(self, path_to_book, zf):
        contents = zf.namelist()
        if 'META-INF/container.xml' not in contents:
            raise InvalidEpub('Missing container.xml from:%s'%path_to_book)
        container = etree.fromstring(zf.read('META-INF/container.xml'))
        opf_file = container.xpath('//rootfile[@media-type="%s" and @full-path]'%\
                                   'application/oebps-package+xml',
                                   namespaces={'ocf':OCF_NS})
        opf_files = container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('Could not find OPF in:%s'%path_to_book)
        opf_name = opf_files[0].attrib['full-path']
        if opf_name not in contents:
            raise InvalidEpub('OPF file in container.xml not found in:%s'%path_to_book)
        return opf_name

    def _get_opf_item(self, zf, opf_name, xpath):
        opf_xml = self._get_opf_tree(zf, opf_name)
        items = opf_xml.xpath(xpath, namespaces={'opf':OPF_NS})
        if items:
            opf_dir = posixpath.dirname(opf_name)
            item_name = self._href_to_name(items[0].attrib['href'], opf_dir)
            if item_name in zf.namelist():
                return item_name

    def _get_opf_items_map(self, zf, opf_name):
        opf_xml = self._get_opf_tree(zf, opf_name)
        items = opf_xml.xpath(r'child::opf:manifest/opf:item[@href]',
                              namespaces={'opf':OPF_NS})
        items_map = {}
        opf_dir = posixpath.dirname(opf_name)
        for item in items:
            item_name = self._href_to_name(item.attrib['href'], opf_dir)
            items_map[item_name] = item
        return items_map

    def _get_opf_tree(self, zf, opf_name):
        data = zf.read(opf_name)
        data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0/',
                OPF_NS, data)
        return etree.fromstring(data)

    def _href_to_name(self, href, base=''):
        href = urllib.unquote(href.partition('#')[0])
        name = href
        if base:
            name = posixpath.join(base, href)
        return name

    def _manifest_worthy_names(self, zf):
        for name in zf.namelist():
            if name == 'mimetype': continue
            if name.endswith('/'): continue
            if name.endswith('.opf'): continue
            if name.startswith('META-INF') and \
                    posixpath.basename(name) in META_INF: continue
            yield name

