# This file is part of EbookLib.
# Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
#
# EbookLib is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# EbookLib is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.


#~ 12 Dec 2022:  Customized to be used as a single **read-only** module, epub.py, in the Calibre EPOM plugin.  All necessary EbookLib files have been copied into this epub.py.


#~ import zipfile
from calibre.constants import DEBUG
from calibre.utils import zipfile
from calibre.utils.zipfile import BadZipfile, ZipFile
import zlib
import six
import logging
import uuid
import warnings
import posixpath as zip_path
import os.path
from collections import OrderedDict

try:
    from urllib.parse import unquote
except ImportError:
    from urllib import unquote

from lxml import etree

#~ all of the next were hard-copied into epub.py for use in the epom plugin...
#~ import ebooklib
#~ from ebooklib.utils import parse_string, parse_html_string, guess_type, get_pages_for_items

# Version of EPUB library
VERSION = (0, 18, 1)

NAMESPACES = {'XML': 'http://www.w3.org/XML/1998/namespace',
              'EPUB': 'http://www.idpf.org/2007/ops',
              'DAISY': 'http://www.daisy.org/z3986/2005/ncx/',
              'OPF': 'http://www.idpf.org/2007/opf',
              'CONTAINERNS': 'urn:oasis:names:tc:opendocument:xmlns:container',
              'DC': 'http://purl.org/dc/elements/1.1/',
              'XHTML': 'http://www.w3.org/1999/xhtml'}

# XML Templates

CONTAINER_PATH = 'META-INF/container.xml'

CONTAINER_XML = '''<?xml version="1.0" encoding="utf-8"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml" full-path="%(folder_name)s/content.opf"/>
  </rootfiles>
</container>
'''

NCX_XML = six.b('''<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" />''')

NAV_XML = six.b('''<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"/>''')

CHAPTER_XML = six.b('''<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"  epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#"></html>''')

COVER_XML = six.b('''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en" xml:lang="en">
 <head>
  <style>
    body { margin: 0em; padding: 0em; }
    img { max-width: 100%; max-height: 100%; }
  </style>
 </head>
 <body>
   <img src="" alt="" />
 </body>
</html>''')

#~ originally in __init__; hard-copied in for epom plugin...
IMAGE_MEDIA_TYPES = ['image/jpeg', 'image/jpg', 'image/png', 'image/svg+xml']

# LIST OF POSSIBLE ITEMS
ITEM_UNKNOWN = 0
ITEM_IMAGE = 1
ITEM_STYLE = 2
ITEM_SCRIPT = 3
ITEM_NAVIGATION = 4
ITEM_VECTOR = 5
ITEM_FONT = 6
ITEM_VIDEO = 7
ITEM_AUDIO = 8
ITEM_DOCUMENT = 9
ITEM_COVER = 10
ITEM_SMIL = 11

# EXTENSION MAPPER
EXTENSIONS = {ITEM_IMAGE: ['.jpg', '.jpeg', '.gif', '.tiff', '.tif', '.png'],
              ITEM_STYLE: ['.css'],
              ITEM_VECTOR: ['.svg'],
              ITEM_FONT: ['.otf', '.woff', '.ttf'],
              ITEM_SCRIPT: ['.js'],
              ITEM_NAVIGATION: ['.ncx'],
              ITEM_VIDEO: ['.mov', '.mp4', '.avi'],
              ITEM_AUDIO: ['.mp3', '.ogg'],
              ITEM_COVER: ['.jpg', '.jpeg', '.png'],
              ITEM_SMIL: ['.smil']
              }


# TOC and navigation elements

class Section(object):

    def __init__(self, title, href=''):
        self.title = title
        self.href = href


class Link(object):

    def __init__(self, href, title, uid=None):
        self.href = href
        self.title = title
        self.uid = uid

# Exceptions


class EpubException(Exception):

    def __init__(self, code, msg):
        self.code = code
        self.msg = msg

    def __str__(self):
        return repr(self.msg)

# Items


class EpubItem(object):

    """
    Base class for the items in a book.
    """

    def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), manifest=True):
        """
        :Args:
          - uid: Unique identifier for this item (optional)
          - file_name: File name for this item (optional)
          - media_type: Media type for this item (optional)
          - content: Content for this item (optional)
          - manifest: Manifest for this item (optional)
        """
        self.id = uid
        self.file_name = file_name
        self.media_type = media_type
        self.content = content
        self.is_linear = True
        self.manifest = manifest

        self.book = None

    def get_id(self):
        """
        Returns unique identifier for this item.

        :Returns:
          Returns uid number as string.
        """
        return self.id

    def get_name(self):
        """
        Returns name for this item. By default it is always file name but it does not have to be.

        :Returns:
          Returns file name for this item.
        """
        return self.file_name

    def get_type(self):
        """
        Guess type according to the file extension. Might not be the best way how to do it, but it works for now.

        Items can be of type:
          - ITEM_UNKNOWN = 0
          - ITEM_IMAGE = 1
          - ITEM_STYLE = 2
          - ITEM_SCRIPT = 3
          - ITEM_NAVIGATION = 4
          - ITEM_VECTOR = 5
          - ITEM_FONT = 6
          - ITEM_VIDEO = 7
          - ITEM_AUDIO = 8
          - ITEM_DOCUMENT = 9
          - ITEM_COVER = 10

        We map type according to the extensions which are defined in ebooklib.EXTENSIONS.

        :Returns:
          Returns type of the item as number.
        """
        _, ext = zip_path.splitext(self.get_name())
        ext = ext.lower()

        for uid, ext_list in six.iteritems(EXTENSIONS):
            if ext in ext_list:
                return uid

        return ITEM_UNKNOWN

    def get_content(self, default=six.b('')):
        """
        Returns content of the item. Content should be of type 'str' (Python 2) or 'bytes' (Python 3)

        :Args:
          - default: Default value for the content if it is not already defined.

        :Returns:
          Returns content of the item.
        """
        return self.content or default

    def set_content(self, content):
        """
        Sets content value for this item.

        :Args:
          - content: Content value
        """
        self.content = content

    def __str__(self):
        return '<EpubItem:%s>' % self.id


class EpubNcx(EpubItem):

    "Represents Navigation Control File (NCX) in the EPUB."

    def __init__(self, uid='ncx', file_name='toc.ncx'):
        super(EpubNcx, self).__init__(uid=uid, file_name=file_name, media_type='application/x-dtbncx+xml')

    def __str__(self):
        return '<EpubNcx:%s>' % self.id


class EpubCover(EpubItem):

    """
    Represents Cover image in the EPUB file.
    """

    def __init__(self, uid='cover-img', file_name=''):
        super(EpubCover, self).__init__(uid=uid, file_name=file_name)

    def get_type(self):
        return ITEM_COVER

    def __str__(self):
        return '<EpubCover:%s:%s>' % (self.id, self.file_name)


class EpubHtml(EpubItem):

    """
    Represents HTML document in the EPUB file.
    """
    _template_name = 'chapter'

    def __init__(self, uid=None, file_name='', media_type='', content=None, title='',
                 lang=None, direction=None, media_overlay=None, media_duration=None):
        super(EpubHtml, self).__init__(uid, file_name, media_type, content)

        self.title = title
        self.lang = lang
        self.direction = direction

        self.media_overlay = media_overlay
        self.media_duration = media_duration

        self.links = []
        self.properties = []
        self.pages = []

    def is_chapter(self):
        """
        Returns if this document is chapter or not.

        :Returns:
          Returns book value.
        """
        return True

    def get_type(self):
        """
        Always returns ebooklib.ITEM_DOCUMENT as type of this document.

        :Returns:
          Always returns ebooklib.ITEM_DOCUMENT
        """

        #~ return ebooklib.ITEM_DOCUMENT
        return ITEM_DOCUMENT

    def set_language(self, lang):
        """
        Sets language for this book item. By default it will use language of the book but it
        can be overwritten with this call.
        """
        self.lang = lang

    def get_language(self):
        """
        Get language code for this book item. Language of the book item can be different from
        the language settings defined globaly for book.

        :Returns:
          As string returns language code.
        """
        return self.lang

    def add_link(self, **kwgs):
        """
        Add additional link to the document. Links will be embeded only inside of this document.

        >>> add_link(href='styles.css', rel='stylesheet', type='text/css')
        """
        self.links.append(kwgs)
        if kwgs.get('type') == 'text/javascript':
            if 'scripted' not in self.properties:
                self.properties.append('scripted')

    def get_links(self):
        """
        Returns list of additional links defined for this document.

        :Returns:
          As tuple return list of links.
        """
        return (link for link in self.links)

    def get_links_of_type(self, link_type):
        """
        Returns list of additional links of specific type.

        :Returns:
          As tuple returns list of links.
        """
        return (link for link in self.links if link.get('type', '') == link_type)

    def add_item(self, item):
        """
        Add other item to this document. It will create additional links according to the item type.

        :Args:
          - item: item we want to add defined as instance of EpubItem
        """
        if item.get_type() == ITEM_STYLE:
            self.add_link(href=item.get_name(), rel='stylesheet', type='text/css')

        if item.get_type() == ITEM_SCRIPT:
            self.add_link(src=item.get_name(), type='text/javascript')

    def get_body_content(self):
        """
        Returns content of BODY element for this HTML document. Content will be of type 'str' (Python 2)
        or 'bytes' (Python 3).

        :Returns:
          Returns content of this document.
        """

        try:
            html_tree = parse_html_string(self.content)
        except:
            return ''

        html_root = html_tree.getroottree()

        if len(html_root.find('body')) != 0:
            body = html_tree.find('body')

            tree_str = etree.tostring(body, pretty_print=True, encoding='utf-8', xml_declaration=False)

            # this is so stupid
            if tree_str.startswith(six.b('<body>')):
                n = tree_str.rindex(six.b('</body>'))

                return tree_str[6:n]

            return tree_str

        return ''

    def get_content(self, default=None):
        """
        Returns content for this document as HTML string. Content will be of type 'str' (Python 2)
        or 'bytes' (Python 3).

        :Args:
          - default: Default value for the content if it is not defined.

        :Returns:
          Returns content of this document.
        """

        tree = parse_string(self.book.get_template(self._template_name))
        tree_root = tree.getroot()

        tree_root.set('lang', self.lang or self.book.language)
        tree_root.attrib['{%s}lang' % NAMESPACES['XML']] = self.lang or self.book.language

        # add to the head also
        #  <meta charset="utf-8" />

        try:
            html_tree = parse_html_string(self.content)
        except:
            return ''

        html_root = html_tree.getroottree()

        # create and populate head

        _head = etree.SubElement(tree_root, 'head')

        if self.title != '':
            _title = etree.SubElement(_head, 'title')
            _title.text = self.title

        for lnk in self.links:
            if lnk.get('type') == 'text/javascript':
                _lnk = etree.SubElement(_head, 'script', lnk)
                # force <script></script>
                _lnk.text = ''
            else:
                _lnk = etree.SubElement(_head, 'link', lnk)

        # this should not be like this
        # head = html_root.find('head')
        # if head is not None:
        #     for i in head.getchildren():
        #         if i.tag == 'title' and self.title != '':
        #             continue
        #         _head.append(i)

        # create and populate body

        _body = etree.SubElement(tree_root, 'body')
        if self.direction:
            _body.set('dir', self.direction)
            tree_root.set('dir', self.direction)

        body = html_tree.find('body')
        if body is not None:
            for i in body.getchildren():
                _body.append(i)

        tree_str = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True)

        return tree_str

    def __str__(self):
        return '<EpubHtml:%s:%s>' % (self.id, self.file_name)


class EpubCoverHtml(EpubHtml):

    """
    Represents Cover page in the EPUB file.
    """

    def __init__(self, uid='cover', file_name='cover.xhtml', image_name='', title='Cover'):
        super(EpubCoverHtml, self).__init__(uid=uid, file_name=file_name, title=title)

        self.image_name = image_name
        self.is_linear = False

    def is_chapter(self):
        """
        Returns if this document is chapter or not.

        :Returns:
          Returns book value.
        """

        return False

    def get_content(self):
        """
        Returns content for cover page as HTML string. Content will be of type 'str' (Python 2) or 'bytes' (Python 3).

        :Returns:
          Returns content of this document.
        """

        self.content = self.book.get_template('cover')

        tree = parse_string(super(EpubCoverHtml, self).get_content())
        tree_root = tree.getroot()

        images = tree_root.xpath('//xhtml:img', namespaces={'xhtml': NAMESPACES['XHTML']})

        images[0].set('src', self.image_name)
        images[0].set('alt', self.title)

        tree_str = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True)

        return tree_str

    def __str__(self):
        return '<EpubCoverHtml:%s:%s>' % (self.id, self.file_name)


class EpubNav(EpubHtml):

    """
    Represents Navigation Document in the EPUB file.
    """

    def __init__(self, uid='nav', file_name='nav.xhtml', media_type='application/xhtml+xml', title=''):
        super(EpubNav, self).__init__(uid=uid, file_name=file_name, media_type=media_type, title=title)

    def is_chapter(self):
        """
        Returns if this document is chapter or not.

        :Returns:
          Returns book value.
        """

        return False

    def __str__(self):
        return '<EpubNav:%s:%s>' % (self.id, self.file_name)


class EpubImage(EpubItem):

    """
    Represents Image in the EPUB file.
    """

    def __init__(self, *args, **kwargs):
        super(EpubImage, self).__init__(*args, **kwargs)

    def get_type(self):
        return ITEM_IMAGE

    def __str__(self):
        return '<EpubImage:%s:%s>' % (self.id, self.file_name)


class EpubSMIL(EpubItem):

    def __init__(self, uid=None, file_name='', content=None):
        super(EpubSMIL, self).__init__(uid=uid, file_name=file_name, media_type='application/smil+xml', content=content)

    def get_type(self):
        return ITEM_SMIL

    def __str__(self):
        return '<EpubSMIL:%s:%s>' % (self.id, self.file_name)

# EpubBook

class EpubBook(object):

    def __init__(self):
        self.EPUB_VERSION = None

        self.reset()

        # we should have options here

    def reset(self):
        "Initialises all needed variables to default values"

        self.metadata = {}
        self.items = []
        self.spine = []
        self.guide = []
        self.pages = []
        self.toc = []
        self.bindings = []

        self.IDENTIFIER_ID = 'id'
        self.FOLDER_NAME = 'EPUB'

        self._id_html = 0
        self._id_image = 0
        self._id_static = 0

        self.title = ''
        self.language = 'en'
        self.direction = None

        self.templates = {
            'ncx': NCX_XML,
            'nav': NAV_XML,
            'chapter': CHAPTER_XML,
            'cover': COVER_XML
        }

        self.add_metadata('OPF', 'generator', '', {
            'name': 'generator', 'content': 'Ebook-lib %s' % '.'.join([str(s) for s in VERSION])
        })

        # default to using a randomly-unique identifier if one is not specified manually
        self.set_identifier(str(uuid.uuid4()))

        # custom prefixes and namespaces to be set to the content.opf doc
        self.prefixes = []
        self.namespaces = {}

    def set_identifier(self, uid):
        """
        Sets unique id for this epub

        :Args:
          - uid: Value of unique identifier for this book
        """

        self.uid = uid

        self.set_unique_metadata('DC', 'identifier', self.uid, {'id': self.IDENTIFIER_ID})

    def set_title(self, title):
        """
        Set title. You can set multiple titles.

        :Args:
          - title: Title value
        """

        self.title = title

        self.add_metadata('DC', 'title', self.title)

    def set_language(self, lang):
        """
        Set language for this epub. You can set multiple languages. Specific items in the book can have
        different language settings.

        :Args:
          - lang: Language code
        """

        self.language = lang

        self.add_metadata('DC', 'language', lang)

    def set_direction(self, direction):
        """
        :Args:
          - direction: Options are "ltr", "rtl" and "default"
        """

        self.direction = direction

    def set_cover(self, file_name, content, create_page=True):
        """
        Set cover and create cover document if needed.

        :Args:
          - file_name: file name of the cover page
          - content: Content for the cover image
          - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
        """

        # as it is now, it can only be called once
        c0 = EpubCover(file_name=file_name)
        c0.content = content
        self.add_item(c0)

        if create_page:
            c1 = EpubCoverHtml(image_name=file_name)
            self.add_item(c1)

        self.add_metadata(None, 'meta', '', OrderedDict([('name', 'cover'), ('content', 'cover-img')]))

    def add_author(self, author, file_as=None, role=None, uid='creator'):
        "Add author for this document"

        self.add_metadata('DC', 'creator', author, {'id': uid})

        if file_as:
            self.add_metadata(None, 'meta', file_as, {'refines': '#' + uid,
                                                      'property': 'file-as',
                                                      'scheme': 'marc:relators'})
        if role:
            self.add_metadata(None, 'meta', role, {'refines': '#' + uid,
                                                   'property': 'role',
                                                   'scheme': 'marc:relators'})

    def add_metadata(self, namespace, name, value, others=None):
        "Add metadata"

        if namespace in NAMESPACES:
            namespace = NAMESPACES[namespace]

        if namespace not in self.metadata:
            self.metadata[namespace] = {}

        if name not in self.metadata[namespace]:
            self.metadata[namespace][name] = []

        self.metadata[namespace][name].append((value, others))

    def get_metadata(self, namespace, name):
        "Retrieve metadata"

        if namespace in NAMESPACES:
            namespace = NAMESPACES[namespace]

        return self.metadata[namespace].get(name, [])

    def set_unique_metadata(self, namespace, name, value, others=None):
        "Add metadata if metadata with this identifier does not already exist, otherwise update existing metadata."

        if namespace in NAMESPACES:
            namespace = NAMESPACES[namespace]

        if namespace in self.metadata and name in self.metadata[namespace]:
            self.metadata[namespace][name] = [(value, others)]
        else:
            self.add_metadata(namespace, name, value, others)

    def add_item(self, item):
        """
        Add additional item to the book. If not defined, media type and chapter id will be defined
        for the item.

        :Args:
          - item: Item instance
        """
        if item.media_type == '':
            (has_guessed, media_type) = guess_type(item.get_name().lower())

            if has_guessed:
                if media_type is not None:
                    item.media_type = media_type
                else:
                    item.media_type = has_guessed
            else:
                item.media_type = 'application/octet-stream'

        if not item.get_id():
            # make chapter_, image_ and static_ configurable
            if isinstance(item, EpubHtml):
                item.id = 'chapter_%d' % self._id_html
                self._id_html += 1
                # If there's a page list, append it to the book's page list
                self.pages += item.pages
            elif isinstance(item, EpubImage):
                item.id = 'image_%d' % self._id_image
                self._id_image += 1
            else:
                item.id = 'static_%d' % self._id_static
                self._id_static += 1

        item.book = self
        self.items.append(item)

        return item

    def get_item_with_id(self, uid):
        """
        Returns item for defined UID.

        >>> book.get_item_with_id('image_001')

        :Args:
          - uid: UID for the item

        :Returns:
          Returns item object. Returns None if nothing was found.
        """
        for item in self.get_items():
            if item.id == uid:
                return item

        return None

    def get_item_with_href(self, href):
        """
        Returns item for defined HREF.

        >>> book.get_item_with_href('EPUB/document.xhtml')

        :Args:
          - href: HREF for the item we are searching for

        :Returns:
          Returns item object. Returns None if nothing was found.
        """
        for item in self.get_items():
            if item.get_name() == href:
                return item

        return None

    def get_items(self):
        """
        Returns all items attached to this book.

        :Returns:
          Returns all items as tuple.
        """
        return (item for item in self.items)

    def get_items_of_type(self, item_type):
        """
        Returns all items of specified type.

        >>> book.get_items_of_type(epub.ITEM_IMAGE)

        :Args:
          - item_type: Type for items we are searching for

        :Returns:
          Returns found items as tuple.
        """
        return (item for item in self.items if item.get_type() == item_type)

    def get_items_of_media_type(self, media_type):
        """
        Returns all items of specified media type.

        :Args:
          - media_type: Media type for items we are searching for

        :Returns:
          Returns found items as tuple.
        """
        return (item for item in self.items if item.media_type == media_type)

    def set_template(self, name, value):
        """
        Defines templates which are used to generate certain types of pages. When defining new value for the template
        we have to use content of type 'str' (Python 2) or 'bytes' (Python 3).

        At the moment we use these templates:
          - ncx
          - nav
          - chapter
          - cover

        :Args:
          - name: Name for the template
          - value: Content for the template
        """

        self.templates[name] = value

    def get_template(self, name):
        """
        Returns value for the template.

        :Args:
          - name: template name

        :Returns:
          Value of the template.
        """
        return self.templates.get(name)

    def add_prefix(self, name, uri):
        """
        Appends custom prefix to be added to the content.opf document

        >>> epub_book.add_prefix('bkterms', 'http://booktype.org/')

        :Args:
          - name: namespave name
          - uri: URI for the namespace
        """

        self.prefixes.append('%s: %s' % (name, uri))


class EpubReader(object):
    DEFAULT_OPTIONS = {
        'ignore_ncx': True  #changed to True from False by EPOM plugin
    }

    def __init__(self, epub_file_name, options=None):
        self.file_name = epub_file_name
        self.book = EpubBook()
        self.zf = None

        self.opf_file = ''
        self.opf_dir = ''

        self.options = dict(self.DEFAULT_OPTIONS)
        if options:
            self.options.update(options)

        #~ self._check_deprecated()   #EPOM

    def _check_deprecated(self):
        if not self.options.get('ignore_ncx'):
            warnings.warn('In the future version we will turn default option ignore_ncx to True.')

    def process(self):
        # should cache this html parsing so we don't do it for every plugin
        for plg in self.options.get('plugins', []):
            if hasattr(plg, 'after_read'):
                plg.after_read(self.book)

        for item in self.book.get_items():
            if isinstance(item, EpubHtml):
                for plg in self.options.get('plugins', []):
                    if hasattr(plg, 'html_after_read'):
                        plg.html_after_read(self.book, item)

    def load(self):
        self._load()

        return self.book

    def read_file(self, name):
        # Raises KeyError
        name = zip_path.normpath(name)
        return self.zf.read(name)

    def _load_container(self):
        meta_inf = self.read_file('META-INF/container.xml')
        tree = parse_string(meta_inf)

        for root_file in tree.findall('//xmlns:rootfile[@media-type]', namespaces={'xmlns': NAMESPACES['CONTAINERNS']}):
            if root_file.get('media-type') == 'application/oebps-package+xml':
                self.opf_file = root_file.get('full-path')
                self.opf_dir = zip_path.dirname(self.opf_file)

    def _load_metadata(self):
        container_root = self.container.getroot()

        # get epub version
        self.book.version = container_root.get('version', None)

        # get unique-identifier
        if container_root.get('unique-identifier', None):
            self.book.IDENTIFIER_ID = container_root.get('unique-identifier')

        # get xml:lang
        # get metadata
        metadata = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'metadata'))

        nsmap = metadata.nsmap
        nstags = dict((k, '{%s}' % v) for k, v in six.iteritems(nsmap))
        default_ns = nstags.get(None, '')

        nsdict = dict((v, {}) for v in nsmap.values())

        def add_item(ns, tag, value, extra):
            if ns not in nsdict:
                nsdict[ns] = {}

            values = nsdict[ns].setdefault(tag, [])
            values.append((value, extra))

        for t in metadata:
            if not etree.iselement(t) or t.tag is etree.Comment:
                continue
            if t.tag == default_ns + 'meta':
                name = t.get('name')
                others = dict((k, v) for k, v in t.items())

                if name and ':' in name:
                    prefix, name = name.split(':', 1)
                else:
                    prefix = None

                add_item(t.nsmap.get(prefix, prefix), name, t.text, others)
            else:
                tag = t.tag[t.tag.rfind('}') + 1:]

                if (t.prefix and t.prefix.lower() == 'dc') and tag == 'identifier':
                    _id = t.get('id', None)

                    if _id:
                        self.book.IDENTIFIER_ID = _id

                others = dict((k, v) for k, v in t.items())
                add_item(t.nsmap[t.prefix], tag, t.text, others)

        self.book.metadata = nsdict

        titles = self.book.get_metadata('DC', 'title')
        if len(titles) > 0:
            self.book.title = titles[0][0]

        for value, others in self.book.get_metadata('DC', 'identifier'):
            if others.get('id') == self.book.IDENTIFIER_ID:
                self.book.uid = value

    def _load_manifest(self):
        for r in self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'manifest')):
            if r is not None and r.tag != '{%s}item' % NAMESPACES['OPF']:
                continue

            media_type = r.get('media-type')
            _properties = r.get('properties', '')

            if _properties:
                properties = _properties.split(' ')
            else:
                properties = []

            # people use wrong content types
            if media_type == 'image/jpg':
                media_type = 'image/jpeg'

            if media_type == 'application/x-dtbncx+xml':
                ei = EpubNcx(uid=r.get('id'), file_name=unquote(r.get('href')))

                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
            elif media_type == 'application/smil+xml':
                ei = EpubSMIL(uid=r.get('id'), file_name=unquote(r.get('href')))

                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
            elif media_type == 'application/xhtml+xml':
                if 'nav' in properties:
                    ei = EpubNav(uid=r.get('id'), file_name=unquote(r.get('href')))

                    ei.content = self.read_file(zip_path.join(self.opf_dir, r.get('href')))
                elif 'cover' in properties:
                    ei = EpubCoverHtml()

                    ei.content = self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href'))))
                else:
                    ei = EpubHtml()

                    ei.id = r.get('id')
                    ei.file_name = unquote(r.get('href'))
                    ei.media_type = media_type
                    ei.media_overlay = r.get('media-overlay', None)
                    ei.media_duration = r.get('duration', None)
                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
                    ei.properties = properties
            elif media_type in IMAGE_MEDIA_TYPES:
                if 'cover-image' in properties:
                    ei = EpubCover(uid=r.get('id'), file_name=unquote(r.get('href')))

                    ei.media_type = media_type
                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
                else:
                    ei = EpubImage()

                    ei.id = r.get('id')
                    ei.file_name = unquote(r.get('href'))
                    ei.media_type = media_type
                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
            else:
                # different types
                ei = EpubItem()

                ei.id = r.get('id')
                ei.file_name = unquote(r.get('href'))
                ei.media_type = media_type

                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))

            self.book.add_item(ei)

    def _parse_ncx(self, data):
        tree = parse_string(data)
        tree_root = tree.getroot()

        nav_map = tree_root.find('{%s}navMap' % NAMESPACES['DAISY'])

        def _get_children(elems, n, nid):
            label, content = '', ''
            children = []

            for a in elems.getchildren():
                if a.tag == '{%s}navLabel' % NAMESPACES['DAISY']:
                    label = a.getchildren()[0].text
                if a.tag == '{%s}content' % NAMESPACES['DAISY']:
                    content = a.get('src', '')
                if a.tag == '{%s}navPoint' % NAMESPACES['DAISY']:
                    children.append(_get_children(a, n + 1, a.get('id', '')))

            if len(children) > 0:
                if n == 0:
                    return children

                return (Section(label, href=content),
                        children)
            else:
                return Link(content, label, nid)

        self.book.toc = _get_children(nav_map, 0, '')

    def _parse_nav(self, data, base_path, navtype='toc'):
        html_node = parse_html_string(data)
        if navtype == 'toc':
            # parsing the table of contents
            nav_node = html_node.xpath("//nav[@*='toc']")[0]
        else:
            # parsing the list of pages
            _page_list = html_node.xpath("//nav[@*='page-list']")
            if len(_page_list) == 0:
                return
            nav_node = _page_list[0]

        def parse_list(list_node):
            items = []

            for item_node in list_node.findall('li'):

                sublist_node = item_node.find('ol')
                link_node = item_node.find('a')

                if sublist_node is not None:
                    title = item_node[0].text
                    children = parse_list(sublist_node)

                    if link_node is not None:
                        href = zip_path.normpath(zip_path.join(base_path, link_node.get('href')))
                        items.append((Section(title, href=href), children))
                    else:
                        items.append((Section(title), children))
                elif link_node is not None:
                    title = link_node.text
                    href = zip_path.normpath(zip_path.join(base_path, link_node.get('href')))

                    items.append(Link(href, title))

            return items

        if navtype == 'toc':
            self.book.toc = parse_list(nav_node.find('ol'))
        elif nav_node is not None:
            # generate the pages list if there is one
            self.book.pages = parse_list(nav_node.find('ol'))

            # generate the per-file pages lists
            # because of the order of parsing the files, this can't be done
            # when building the EpubHtml objects
            htmlfiles = dict()
            for htmlfile in self.book.items:
                if isinstance(htmlfile, EpubHtml):
                    htmlfiles[htmlfile.file_name] = htmlfile
            for page in self.book.pages:
                try:
                    (filename, idref) = page.href.split('#')
                except ValueError:
                    filename = page.href
                if filename in htmlfiles:
                    htmlfiles[filename].pages.append(page)

    def _load_spine(self):
        spine = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'spine'))

        self.book.spine = [(t.get('idref'), t.get('linear', 'yes')) for t in spine]

        toc = spine.get('toc', '')
        self.book.set_direction(spine.get('page-progression-direction', None))

        # should read ncx or nav file
        nav_item = next((item for item in self.book.items if isinstance(item, EpubNav)), None)
        if toc:
            if not self.options.get('ignore_ncx') or not nav_item:
                try:
                    ncxFile = self.read_file(zip_path.join(self.opf_dir, self.book.get_item_with_id(toc).get_name()))
                except KeyError:
                    raise EpubException(-1, 'Can not find ncx file.')

                self._parse_ncx(ncxFile)

    def _load_guide(self):
        guide = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'guide'))
        if guide is not None:
            self.book.guide = [{'href': t.get('href'), 'title': t.get('title'), 'type': t.get('type')} for t in guide]

    def _load_opf_file(self):
        try:
            s = self.read_file(self.opf_file)
        except KeyError:
            raise EpubException(-1, 'Can not find container file')

        self.container = parse_string(s)

        self._load_metadata()
        self._load_manifest()
        self._load_spine()
        self._load_guide()

        # read nav file if found
        #
        nav_item = next((item for item in self.book.items if isinstance(item, EpubNav)), None)
        if nav_item:
            if self.options.get('ignore_ncx') or not self.book.toc:
                self._parse_nav(
                    nav_item.content,
                    zip_path.dirname(nav_item.file_name),
                    navtype='toc'
                )
            self._parse_nav(
                nav_item.content,
                zip_path.dirname(nav_item.file_name),
                navtype='pages'
            )

    def _load(self):

        self.file_name = str(self.file_name)  #EPOM

        if os.path.isdir(self.file_name):
            file_name = self.file_name

            class Directory:
                def read(self, subname):
                    with open(os.path.join(file_name, subname), 'rb') as fp:
                        return fp.read()

                def close(self):
                    pass

            self.zf = Directory()
        else:
            try:
                self.zf = zipfile.ZipFile(self.file_name, 'r', compression=zipfile.ZIP_DEFLATED, allowZip64=True)
            except zipfile.BadZipfile as bz:
                raise EpubException(0, 'Bad Zip file')
            except zipfile.LargeZipFile as bz:
                raise EpubException(1, 'Large Zip file')

        # 1st check metadata
        self._load_container()
        self._load_opf_file()

        self.zf.close()

# READ

def read_epub(name, options=None):
    """
    Creates new instance of EpubBook with the content defined in the input file.

    >>> book = ebooklib.read_epub('book.epub')

    :Args:
      - name: full path to the input file
      - options: extra options as dictionary (optional)

    :Returns:
      Instance of EpubBook.
    """

    if DEBUG: print("read_epub: ", name)
    reader = EpubReader(name, options)

    book = reader.load()

    #~ reader.process()  EPOM

    return book

#~ =========== utils.py ====================================

import io
import mimetypes

from lxml import etree


mimetype_initialised = False


def debug(obj):
    import pprint

    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(obj)


def parse_string(s):
    parser = etree.XMLParser(recover=True, resolve_entities=False)
    try:
        tree = etree.parse(io.BytesIO(s.encode('utf-8')) , parser=parser)
    except:
        tree = etree.parse(io.BytesIO(s) , parser=parser)

    return tree


def parse_html_string(s):
    from lxml import html

    utf8_parser = html.HTMLParser(encoding='utf-8')

    html_tree = html.document_fromstring(s, parser=utf8_parser)

    return html_tree


def guess_type(extenstion):
    global mimetype_initialised

    if not mimetype_initialised:
        mimetypes.init()
        mimetypes.add_type('application/xhtml+xml', '.xhtml')
        mimetype_initialised = True

    return mimetypes.guess_type(extenstion)


def create_pagebreak(pageref, label=None, html=True):
    #~ from ebooklib.epub import NAMESPACES

    pageref_attributes = {
        '{%s}type' % NAMESPACES['EPUB']: 'pagebreak',
        'title': u'{}'.format(pageref),
        'id': u'{}'.format(pageref),
     }

    pageref_elem = etree.Element('span', pageref_attributes, nsmap={'epub': NAMESPACES['EPUB']})

    if label:
        pageref_elem.text = label

    if html:
        return etree.tostring(pageref_elem, encoding='unicode')

    return pageref_elem


def get_headers(elem):
    for n in range(1, 7):
        headers = elem.xpath('./h{}'.format(n))

        if len(headers) > 0:
            text = headers[0].text_content().strip()
            if len(text) > 0:
                return text
    return None


def get_pages(item):
    body = parse_html_string(item.get_body_content())
    pages = []

    for elem in body.iter():
        if 'epub:type' in elem.attrib:
            if elem.get('id') is not None:
                _text = None

                if elem.text is not None and elem.text.strip() != '':
                    _text = elem.text.strip()

                if _text is None:
                    _text = elem.get('aria-label')

                if _text is None:
                    _text = get_headers(elem)

                pages.append((item.get_name(), elem.get('id'), _text or elem.get('id')))

    return pages


def get_pages_for_items(items):
    pages_from_docs = [get_pages(item) for item in items]

    return [item for pages in pages_from_docs for item in pages]




