# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import absolute_import
from six.moves import range
from polyglot.builtins import is_py3

__license__   = 'GPL v3'
__copyright__ = '2011-2018, meme'
__docformat__ = 'restructuredtext en'

#####################################################################
# Kindle book parsing code
#####################################################################

import os, re, struct, hashlib, sys, traceback
from calibre import force_unicode
from calibre_plugins.kindle_collections.utilities import debug_print
import six

KINDLE_INTERNAL_ROOT = '/mnt/us'

#####################################################################

# c.f., https://github.com/kevinhendricks/KindleUnpack/blob/master/lib/compatibility_utils.py
if is_py3:
    def bstr(s):
        if isinstance(s, str):
            return bytes(s, 'latin-1')
        else:
            return bytes(s)

    def bord(s):
        return s
else:
    def bstr(s):
        return str(s)

    def bord(s):
        return ord(s)

# Based on Mobipocket code
class EBook():

    def __init__(self, path):
        self.path = path
        self.title = None
        self.meta = None
        self.author = None
        self.asin = None
        self.type = None
        self.mobi_type = None
        self.text_encoding = None
        self.pubdate = None
        self.collection_code = None

        ext = os.path.splitext(self.path)[1][1:].lower()
        self.error = None
        if ext in ['mobi', 'pobi', 'azw', 'azw3', 'prc']:
            try:
                self.meta = Mobi(self.path)
            except ValueError as e:
                self.error = e
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                    self.mobi_type = self.meta.mobi_type
                    self.text_encoding = self.meta.text_encoding
                    if 100 in self.meta.exth:
                        self.author = self.meta.exth[100]
                    if 106 in self.meta.exth:
                        self.pubdate = self.meta.exth[106]
                    if 113 in self.meta.exth:
                        self.asin = self.meta.exth[113]
                    if 501 in self.meta.exth:
                        self.type = self.meta.exth[501]
                    if 503 in self.meta.exth:
                        self.title = self.meta.exth[503]
        elif ext in ['tpz', 'azw1']:
            try:
                self.meta = Topaz(self.path)
            except ValueError as e:
                self.error = e
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                    if self.meta.asin:
                        self.asin = self.meta.asin
                    if self.meta.type:
                        self.type = self.meta.type
        elif ext in ['azw2']:
            try:
                self.meta = Kindlet(self.path)
            except ValueError as e:
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                if self.meta.asin:
                    self.asin = self.meta.asin
                    self.type = 'AZW2'
        elif ext in ['kfx']:
            try:
                self.meta = Kfx(self.path)
            except ValueError as e:
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                if self.meta.asin:
                    self.asin = self.meta.asin
                if self.meta.authors:
                    self.author = self.meta.authors
                if self.meta.pubdate:
                    self.pubdate = self.meta.pubdate
                self.type = 'EBOK'
                # i.e., UTF-8 in Mobi-speak ;).
                self.text_encoding = 65001

        # Change Kindle code to Amazon ASIN format if found
        if self.error:
            raise ValueError(self.error)
        elif self.asin and self.asin != '' and self.title:
            self.collection_code = "#{!s}^{!s}".format(force_unicode(self.asin, 'utf-8'), force_unicode(self.type, 'utf-8'))
        elif os.path.isfile(self.path):
            self.collection_code = "*{!s}".format(self.get_hash(self.get_internal_kindle_path(self.path)))
        else:
            raise ValueError('Unable to open file %s' % self.path)

    # Returns SHA-1 hash
    def get_hash(self, path):
        return hashlib.sha1(path.encode('utf-8')).hexdigest()

    # Returns the internal path (e.g. /mnt/us/somepath) for an absolute path to a Kindle file (converts '/' separator to current OS separator)
    def get_internal_kindle_path(self, path):
        path = os.path.normpath(path)
        folder = os.path.dirname(path)
        filename = os.path.basename(path)
        return '/'.join([ KINDLE_INTERNAL_ROOT, re.sub(r'.*(documents|pictures|audible|music)', r'\1', folder), filename ]).replace('\\', '/')

# Based on MobiUnpack
class Sectionizer:
    def __init__(self, filename):
        try:
            self.data = open(filename, 'rb').read()
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            self.header = self.data[:78]
            self.ident = self.header[0x3C:0x3C+8]
            # Get title from old PalmDOC format files
            if self.ident == b'BOOKMOBI':
                try:
                    num_sections, = struct.unpack_from(b'>H', self.header, 76)
                    filelength = len(self.data)
                    sectionsdata = struct.unpack_from(bstr('>%dL' % (num_sections*2)), self.data, 78) + (filelength, 0)
                    self.sectionoffsets = sectionsdata[::2]
                except:
                    debug_print("Unexpected error in reading Mobi book header information - unable to unpack: {}".format(sys.exc_info()[0]))
                    traceback.print_exc()
                    raise ValueError('Unexpected error in reading Mobi book header information - unable to unpack.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending to device')
            elif self.ident != b'TEXtREAd':
                raise ValueError('This book contains invalid Mobi book header information and cannot be read.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending it to the device')
        return

    def loadSection(self, section):
        before, after = self.sectionoffsets[section:section+2]
        return self.data[before:after]

# Mobi metadata parsing
class Mobi:
    def __init__(self, filename):
        self.title = None
        self.mobi_type = None
        self.text_encoding = None
        try:
            sections = Sectionizer(filename)
            if sections.ident == b'TEXtREAd':
                # Old Palm Doc format
                self.title = sections.data[:32]
                self.exth = []
            else:
                header = sections.loadSection(0)
                length, self.mobi_type, self.text_encoding = struct.unpack(b'>LLL', header[20:32])
                toff, tlen = struct.unpack(b'>II', header[0x54:0x5c])
                tend = toff + tlen
                self.title=header[toff:tend]

                exth_flag, = struct.unpack(b'>L', header[0x80:0x84])
                hasExth = exth_flag & 0x40
                exth_rec = ''
                exth_offset = length + 16
                exth_length = 0
                if hasExth:
                    exth_length, = struct.unpack_from(b'>L', header, exth_offset+4)
                    exth_length = ((exth_length + 3)>>2)<<2 # round to next 4 byte boundary
                    exth_rec = header[exth_offset:exth_offset+exth_length]
                self.exth = dict()
                if hasExth and exth_length != 0 and exth_rec != '':
                    num_items, = struct.unpack(b'>L', exth_rec[8:12])
                    pos = 12
                    self.exth[100] = []
                    for _ in range(num_items):
                        exth_id, size = struct.unpack(b'>LL', exth_rec[pos:pos+8])
                        # We only care about a few fields...
                        if exth_id in [100, 106, 113, 501, 503]:
                            contentsize = size - 8
                            content = exth_rec[pos+8:pos+size]
                            # For author, build a list, to support the way we now handle multiple authors (via multiple exth 100 fields)
                            if exth_id == 100:
                                self.exth[exth_id].append(content)
                            else:
                                self.exth[exth_id] = content
                        pos += size
                    # Join the list in a string to let force_unicode do its job properly later...
                    # NOTE: Bytes all the way down ;).
                    self.exth[100] = b';'.join(self.exth[100])
        except ValueError as e:
            raise ValueError(e)
        except:
            debug_print("Unexpected error in reading Mobi book header information: {}".format(sys.exc_info()[0]))
            traceback.print_exc()
            raise ValueError('Unexpected error in reading Mobi book header information.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending to device')

    def zbyte(self, text):
        for i in range(len(text)):
            if text[i] == '\0':
                break
        return i

# Kindlet metadata parsing
class Kindlet:
    def __init__(self, filename):
        import zipfile, zipimport
        # For official apps, ASIN is stored in the Amazon-ASIN field of META-INF/MANIFEST.MF, and title in the Implementation-Title field
        try:
            kindlet = zipfile.ZipFile( filename, 'r')
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            kdkmanifest = kindlet.read( 'META-INF/MANIFEST.MF' )
            # Catch Title
            kdktitlem = re.search( b'(^Implementation-Title: )(.*?$)', kdkmanifest, re.MULTILINE )
            if kdktitlem and kdktitlem.group(2):
                self.title = kdktitlem.group(2).strip()
            else:
                self.title = None
            # Catch ASIN
            kdkasinm = re.search( b'(^Amazon-ASIN: )(.*?$)', kdkmanifest, re.MULTILINE )
            if kdkasinm and kdkasinm.group(2):
                self.asin = kdkasinm.group(2).strip()
            else:
                self.asin = None
            kindlet.close()

# Topaz metadata parsing. Almost verbatim code by Greg Riker from Calibre
class StreamSlicer(object):
    def __init__(self, stream, start=0, stop=None):
        self._stream = stream
        self.start = start
        if stop is None:
            stream.seek(0, 2)
            stop = stream.tell()
        self.stop = stop
        self._len = stop - start

    def __len__(self):
        return self._len

    def __getitem__(self, key):
        stream = self._stream
        base = self.start
        if isinstance(key, six.integer_types):
            stream.seek(base + key)
            return stream.read(1)
        if isinstance(key, slice):
            start, stop, stride = key.indices(self._len)
            if stride < 0:
                start, stop = stop, start
            size = stop - start
            if size <= 0:
                return b""
            stream.seek(base + start)
            data = stream.read(size)
            if stride != 1:
                data = data[::stride]
            return data
        raise TypeError('stream indices must be integers')

class Topaz(object):
    def __init__(self, filename):
        try:
            self.stream = open(filename, 'rb')
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            self.data = StreamSlicer(self.stream)

            sig = self.data[:4]
            if not sig.startswith(b'TPZ'):
                raise ValueError('Not a valid Topaz file')
            offset = 4

            self.header_records, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            self.topaz_headers = self.get_headers(offset)

            # First integrity test - metadata header
            if b'metadata' not in self.topaz_headers:
                raise ValueError('Not a valid Topaz file, no metadata record')

            # Second integrity test - metadata body
            md_offset = self.topaz_headers[b'metadata']['blocks'][0]['offset']
            md_offset += self.base
            if self.data[md_offset+1:md_offset+9] != b'metadata':
                raise ValueError('Not a valid Topaz file, damaged metadata record')

            # Get metadata, and store what we need
            try:
                self.title, self.author, self.asin, self.type = self.get_metadata()
            except:
                debug_print("Unable to read metadata: {}".format(sys.exc_info()[0]))
                traceback.print_exc()
                raise ValueError('Unable to read metadata from file %s' % filename)
            self.stream.close()

    def decode_vwi(self, byts):
        pos, val = 0, 0
        done = False
        byts = bytearray(byts)
        while pos < len(byts) and not done:
            b = byts[pos]
            pos += 1
            if (b & 0x80) == 0:
                done = True
            b &= 0x7F
            val <<= 7
            val |= b
            if done:
                break
        return val, pos

    def get_headers(self, offset):
        # Build a dict of topaz_header records, list of order
        topaz_headers = {}
        for x in range(self.header_records):
            offset += 1
            taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            tag = self.data[offset:offset+taglen]
            offset += taglen
            num_vals, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            blocks = {}
            for val in range(num_vals):
                hdr_offset, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                len_uncomp, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                len_comp, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                blocks[val] = dict(offset=hdr_offset,len_uncomp=len_uncomp,len_comp=len_comp)
            topaz_headers[tag] = dict(blocks=blocks)
        self.eoth = self.data[offset]
        offset += 1
        self.base = offset
        return topaz_headers

    def get_metadata(self):
        ''' Return MetaInformation with title, author'''
        self.get_original_metadata()
        return force_unicode(self.metadata[b'Title'], 'utf-8'), force_unicode(self.metadata[b'Authors'], 'utf-8'), force_unicode(self.metadata[b'ASIN'], 'utf-8'), force_unicode(self.metadata[b'CDEType'], 'utf-8')

    def get_original_metadata(self):
        offset = self.base + self.topaz_headers[b'metadata']['blocks'][0]['offset']
        self.md_header = {}
        taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
        offset += consumed
        self.md_header['tag'] = self.data[offset:offset+taglen]
        offset += taglen
        self.md_header['flags'] = ord(self.data[offset:offset+1])
        offset += 1
        self.md_header['num_recs'] = ord(self.data[offset:offset+1])
        offset += 1

        self.metadata = {}
        for x in range(self.md_header['num_recs']):
            taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            tag = self.data[offset:offset+taglen]
            offset += taglen
            md_len, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            metadata = self.data[offset:offset + md_len]
            offset += md_len
            self.metadata[tag] = metadata

# KFX metadata parsing, c.f., Calibre's metadata_from_path @ devices/kindle/driver.py
# Originally implemented in KCP by stefano.sb (https://www.mobileread.com/forums/showpost.php?p=3731470&postcount=256)
class Kfx:
    def __init__(self, filename):
        from calibre.ebooks.metadata.kfx import read_metadata_kfx
        mi = None
        self.asin = None
        self.title = None
        self.authors = None
        self.pubdate = None
        try:
            # Try the book itself first, for standalone, generated KFX files
            kfx_path = filename
            with lopen(kfx_path, 'rb') as f:
                if f.read(8) != b'\xeaDRMION\xee':
                    f.seek(0)
                    mi = read_metadata_kfx(f)
                else:
                    # Otherwise, look for the sidecar metadata file, for shipped KFX files
                    kfx_path = os.path.join(filename.rpartition('.')[0] + '.sdr', 'assets', 'metadata.kfx')
                    with lopen(kfx_path, 'rb') as mf:
                        mi = read_metadata_kfx(mf)

            self.asin = mi.get_identifiers().get('mobi-asin')
            self.title = mi.title
            self.authors = mi.authors
            # Date is a proper datetime object, while we only handle parsing string ourselves... Dumb it down.
            self.pubdate = str(mi.pubdate)

        except:
            debug_print("Unable to parse KFX metadata: {}".format(sys.exc_info()[0]))
            traceback.print_exc()
            raise ValueError('Unable to parse KFX metadata from file %s' % filename)
