#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
__docformat__ = 'restructuredtext en'

import os, posixpath, urllib, sys, re

from lxml import etree
from lxml.etree import XMLSyntaxError

from calibre import guess_type, prepare_string_for_xml
from calibre.constants import iswindows
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.epub.fix import InvalidEpub, ParseError
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.base import urlnormalize
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED

exists, join = os.path.exists, os.path.join

OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
OPF_NS = 'http://www.idpf.org/2007/opf'
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'

class Container(object):

    META_INF = {
            'container.xml' : True,
            'manifest.xml' : False,
            'encryption.xml' : False,
            'metadata.xml' : False,
            'signatures.xml' : False,
            'rights.xml' : False,
    }

    def __init__(self, path, log):
        self.root = os.path.abspath(path)
        self.log = log
        self.dirtied = set([])
        self.cache = {}
        self.mime_map = {}

        if exists(join(self.root, 'mimetype')):
            os.remove(join(self.root, 'mimetype'))

        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root,
                *opf_files[0].get('full-path').split('/'))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')

        # Map of relative paths with / separators to absolute
        # paths on filesystem with os separators
        self.name_map = {}
        for dirpath, dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = join(dirpath, f)
                name = os.path.relpath(path, self.root).replace(os.sep, '/')
                self.name_map[name] = path
                if path == opf_path:
                    self.opf_name = name
                    self.mime_map[name] = guess_type('a.opf')[0]

        for item in self.opf.xpath(
                '//opf:manifest/opf:item[@href and @media-type]',
                namespaces={'opf':OPF_NS}):
            href = item.get('href')
            self.mime_map[self.href_to_name(href,
                posixpath.dirname(self.opf_name))] = item.get('media-type')

    def manifest_worthy_names(self):
        for name in self.name_map:
            if name.endswith('.opf'): continue
            if name.startswith('META-INF') and \
                    posixpath.basename(name) in self.META_INF: continue
            yield name

    def delete_name(self, name):
        '''
        Overridden to ensure that it will not blow up if called with
        a name that is not in the map
        '''
        if name in self.mime_map:
            self.mime_map.pop(name, None)
        if name in self.name_map:
            path = self.name_map[name]
            os.remove(path)
            self.name_map.pop(name)

    def manifest_item_for_name(self, name):
        href = self.name_to_href(name,
            posixpath.dirname(self.opf_name))
        q = prepare_string_for_xml(href, attribute=True)
        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
                namespaces={'opf':OPF_NS})
        if not existing:
            return None
        return existing[0]

    def add_name_to_manifest(self, name, mt=None):
        item = self.manifest_item_for_name(name)
        if item is not None:
            return
        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
                id=self.generate_manifest_id())
        if not mt:
            mt = guess_type(posixpath.basename(name))[0]
        if not mt:
            mt = 'application/octest-stream'
        item.set('media-type', mt)
        manifest.append(item)
        self.fix_tail(item)

    def fix_tail(self, item):
        '''
        Designed only to work with self closing elements after item has
        just been inserted/appended
        '''
        parent = item.getparent()
        idx = parent.index(item)
        if idx == 0:
            item.tail = parent.text
        else:
            item.tail = parent[idx-1].tail
            if idx == len(parent)-1:
                parent[idx-1].tail = parent.text

    def generate_manifest_id(self):
        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
                namespaces={'opf':OPF_NS})
        ids = set([x.get('id') for x in items])
        for x in xrange(sys.maxint):
            c = 'id%d'%x
            if c not in ids:
                return c

    @property
    def opf(self):
        return self.get(self.opf_name)

    def href_to_name(self, href, base=''):
        '''
        Overridden to fix a bug in the Calibre function which incorrectly
        splits the href on # when # is part of the filename
        '''
        hash_index = href.find('#')
        period_index = href.find('.')
        if hash_index > 0 and hash_index > period_index:
            href = href.partition('#')[0]
        href = urllib.unquote(href)
        name = href
        if base:
            name = posixpath.join(base, href)
        return name

    def name_to_href(self, name, base):
        '''
        Overridden to ensure that blank href names are correctly
        referenced as "" rather than "."
        '''
        if not base:
            return name
        href = posixpath.relpath(name, base)
        if href == '.':
            href = ''
        return href

    def get_raw(self, name):
        path = self.name_map[name]
        return open(path, 'rb').read()

    def get(self, name):
        if name in self.cache:
            return self.cache[name]
        raw = self.get_raw(name)
        if name in self.mime_map:
            try:
                raw = self._parse(raw, self.mime_map[name])
            except XMLSyntaxError as err:
                raise ParseError(name, unicode(err))
        self.cache[name] = raw
        return raw

    def set(self, name, val):
        self.cache[name] = val
        self.dirtied.add(name)

    def _parse(self, raw, mimetype):
        '''
        Overridden to add support for 'text/xml' as a mimetype for NCX files
        '''
        mt = mimetype.lower()
        if mt.endswith('+xml') or mt.endswith('/xml'):
            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
            raw = xml_to_unicode(raw,
                strip_encoding_pats=True, assume_utf8=True,
                resolve_entities=True)[0].strip()
            idx = raw.find('<html')
            if idx == -1:
                idx = raw.find('<HTML')
            if idx > -1:
                pre = raw[:idx]
                raw = raw[idx:]
                if '<!DOCTYPE' in pre:
                    user_entities = {}
                    for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
                        val = match.group(2)
                        if val.startswith('"') and val.endswith('"'):
                            val = val[1:-1]
                        user_entities[match.group(1)] = val
                    if user_entities:
                        pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
                        raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
            return etree.fromstring(raw, parser=parser)
        return raw

    def write(self, path):
        '''
        Overridden to change how the zip file is assembled as found
        issues with the add_dir function as it was written
        '''
        for name in self.dirtied:
            data = self.cache[name]
            raw = data
            if hasattr(data, 'xpath'):
                raw = etree.tostring(data, encoding='utf-8',
                        xml_declaration=True)
            with open(self.name_map[name], 'wb') as f:
                f.write(raw)
        self.dirtied.clear()
        with ZipFile(path, 'w', compression=ZIP_DEFLATED) as zf:
            # Write mimetype
            zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
                    compression=ZIP_STORED)
            # Write everything else
            exclude_files = ['.DS_Store','mimetype']
            for root, dirs, files in os.walk(self.root):
                for fn in files:
                    if fn in exclude_files:
                        continue
                    absfn = os.path.join(root, fn)
                    zfn = os.path.relpath(absfn,
                            self.root).replace(os.sep, '/')
                    zf.write(absfn, zfn)


class ExtendedContainer(Container):
    '''
    Extend the Calibre epub-fix container with additional functions
    that assist with writing updated manifests and toc
    '''
    def __init__(self, path, log):
        Container.__init__(self, path, log)
        self.ncx = self.ncx_name = None
        for name in self.manifest_worthy_names():
            if name.endswith('.ncx'):
                try:
                    self.ncx_name = name
                    self.ncx = self.get(self.ncx_name)
                except ParseError:
                    # This ePub is probably protected with DRM and the NCX is encrypted
                    self.ncx_name = None
                    self.ncx = None
                break

    def delete_from_manifest(self, name):
        '''
        Remove this item from the manifest, spine, guide and TOC ncx if it exists
        '''
        self.delete_name(name)
        item = self.manifest_item_for_name(name)
        if item is None:
            return
        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
        self.log('\t  Manifest item removed: %s (%s)'%(item.get('href'), item.get('id')))
        manifest.remove(item)
        self.set(self.opf_name, self.opf)

        # Now remove the item from the spine if it exists
        self.delete_from_spine(item)

        # Remove from the guide if it exists
        self.delete_from_guide(item)

        # Finally remove the item from the TOC
        self.delete_from_toc(item)

    def delete_from_spine(self, item):
        '''
        Given an item, remove it from the spine
        '''
        item_id = item.get('id')
        itemref = self.opf.xpath('//opf:spine/opf:itemref[@idref="%s"]'%item_id,
                namespaces={'opf':OPF_NS})
        if itemref:
            self.log('\t  Spine itemref removed:', item_id)
            spine = itemref[0].getparent()
            spine.remove(itemref[0])
            self.set(self.opf_name, self.opf)

    def delete_from_guide(self, item):
        '''
        Given an item, remove it from the guide
        '''
        item_href = item.get('href')
        reference = self.opf.xpath('//opf:guide/opf:reference[@href="%s"]'%item_href,
                namespaces={'opf':OPF_NS})
        if reference:
            self.log('\t  Guide reference removed: %s'%item_href)
            guide = reference[0].getparent()
            guide.remove(reference[0])
            self.set(self.opf_name, self.opf)

    def delete_from_toc(self, item):
        '''
        Given an item from the manifest, remove any matching entry from
        the TOC ncx file
        '''
        def test_navpoint_for_removal(navpoint):
            src = navpoint.xpath('ncx:content/@src', namespaces={'ncx':NCX_NS})
            if src:
                src = src[0].lower()
                href = item.get('href').lower()
                if src == href or src.startswith(href + '#'):
                    self.log('\t  TOC Navpoint removed of:', src)
                    return True
            return False

        if item is None or self.ncx_name is None:
            return
        dirtied = False
        self.log('NCX NAME:', self.ncx_name)
        self.log('NCX:', self.ncx)
        for navpoint in self.ncx.xpath('//ncx:navPoint', namespaces={'ncx':NCX_NS}):
            if test_navpoint_for_removal(navpoint):
                dirtied = True
                p = navpoint.getparent()
                idx = p.index(navpoint)
                p.remove(navpoint)
                for child in reversed(navpoint):
                    if child.tag == '{%s}navPoint'%NCX_NS:
                        self.log('\t  TOC Navpoint child promoted')
                        p.insert(idx, child)
        if dirtied:
            self._indent(self.ncx)
            self.set(self.ncx_name, self.ncx)

    def _indent(self, elem, level=0):
        i = '\n' + level*'    '
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + '    '
            for e in elem:
                self._indent(e, level+1)
                if not e.tail or not e.tail.strip():
                    e.tail = i + '    '
            if not e.tail or not e.tail.strip():
                e.tail = i
        else:
            if level and (not elem.text or not elem.text.strip()):
                elem.text = i
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i

    def generate_unique(self, id=None, href=None):
        '''
        Generate a new unique identifier and/or internal path for use in
        creating a new manifest item, using the provided :param:`id` and/or
        :param:`href` as bases.

        Returns an two-tuple of the new id and path.  If either :param:`id` or
        :param:`href` are `None` then the corresponding item in the return
        tuple will also be `None`.

        Grant: Copied/modified from calibre.ebooks.oeb.base.Manifest
        '''
        if id is not None:
            items = self.opf.xpath('//opf:manifest/opf:item[@id]',
                    namespaces={'opf':OPF_NS})
            ids = set([x.get('id') for x in items])

            base = id
            index = 1
            while id in ids:
                id = base + str(index)
                index += 1
        if href is not None:
            items = self.opf.xpath('//opf:manifest/opf:item[@href]',
                    namespaces={'opf':OPF_NS})
            hrefs = set([x.get('href') for x in items])

            href = urlnormalize(href)
            base, ext = os.path.splitext(href)
            index = 1
            lhrefs = set([x.lower() for x in hrefs])
            while href.lower() in lhrefs:
                href = base + str(index) + ext
                index += 1
        return id, href

    def add_to_manifest(self, id, href, mt=None):
        '''
        Given an id and an href, create an item in the manifest for it
        '''
        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
                href=href, id=id)
        if not mt:
            mt = guess_type(href)[0]
        if not mt:
            mt = 'application/octest-stream'
        item.set('media-type', mt)
        manifest.append(item)
        self.fix_tail(item)
        self.log('\t  Manifest item added: %s (%s)'%(href, id))

    def add_to_spine(self, id, index=-1):
        '''
        Given an id, add it to the spine, optionally at the specified position
        '''
        spine = self.opf.xpath('//opf:spine', namespaces={'opf':OPF_NS})[0]
        itemref = spine.makeelement('{%s}itemref'%OPF_NS, nsmap={'opf':OPF_NS},
                idref=id)
        if index >= 0:
            spine.insert(index, itemref)
        else:
            spine.append(itemref)
        self.fix_tail(itemref)
        self.log('\t  Spine item inserted: %s at pos: %d'%(id, index))

    def get_spine_itemref_idref(self, index):
        spine = self.opf.xpath('//opf:spine', namespaces={'opf':OPF_NS})[0]
        if index < len(spine):
            return spine[index].get('idref')

