# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

__license__   = 'GPL v3'
__copyright__ = '2011, meme'
__docformat__ = 'restructuredtext en'

#####################################################################
# Kindle book parsing code
#####################################################################

import os, re, struct, hashlib

KINDLE_INTERNAL_ROOT = '/mnt/us'

#####################################################################

# Based on Mobipocket code
class EBook():

    def __init__(self, path):
        self.path = path
        self.title = None
        self.meta = None
        self.author = None
        self.asin = None
        self.type = None
        self.mobi_type = None
        self.text_encoding = None
        self.pubdate = None
        self.collection_code = None

        ext = os.path.splitext(self.path)[1][1:].lower()
        error = ''
        if ext in ['mobi', 'azw', 'prc']:
            try:
                self.meta = Mobi(self.path)
            except ValueError as error:
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                    self.mobi_type = self.meta.mobi_type
                    self.text_encoding = self.meta.text_encoding
                    if 100 in self.meta.exth:
                        self.author= self.meta.exth[100]
                    if 106 in self.meta.exth:
                        self.pubdate = self.meta.exth[106]
                    if 113 in self.meta.exth:
                        self.asin = self.meta.exth[113]
                    if 501 in self.meta.exth:
                        self.type = self.meta.exth[501]
                    if 503 in self.meta.exth:
                        self.title = self.meta.exth[503]
        elif ext in ['tpz', 'azw1']:
            try:
                self.meta = Topaz(self.path)
            except ValueError as error:
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                    if self.meta.asin:
                        self.asin = self.meta.asin
                    if self.meta.type:
                        self.type = self.meta.type
        elif ext in ['azw2']:
            try:
                self.meta = Kindlet(self.path)
            except ValueError as e:
                pass
            else:
                if self.meta.title:
                    self.title = self.meta.title
                if self.meta.asin:
                    self.asin = self.meta.asin
                    self.type = 'AZW2'

        # Change Kindle code to Amazon ASIN format if found
        if error:
            raise ValueError(error)
        elif self.asin and self.asin != '' and self.title:
            self.collection_code = '#%s^%s' % (self.asin, self.type)
        elif os.path.isfile(self.path):
            self.collection_code = '*' + self.get_hash(self.get_internal_kindle_path(self.path))
        else:
            raise ValueError('Unable to open file %s' % self.path)

    # Returns SHA-1 hash
    def get_hash(self, path):
        return hashlib.sha1(path).hexdigest()

    # Returns the internal path (e.g. /mnt/us/somepath) for an absolute path to a Kindle file (converts '/' separator to current OS separator)
    def get_internal_kindle_path(self, path):
        path = os.path.normpath(path)
        folder = os.path.dirname(path)
        filename = os.path.basename(path)
        return '/'.join([ KINDLE_INTERNAL_ROOT, re.sub(r'.*(documents|pictures|audible|music)', r'\1', folder), filename ]).replace('\\', '/')

# Based on code by Mobileread's teegee543
class Sectionizer:

    def __init__(self, filename, perm):
        try:
            self.f = file(filename, perm)
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            self.header = self.f.read(78)
            self.ident = self.header[0x3C:0x3C+8]
            # Get title from old PalmDOC format files
            if self.ident == 'BOOKMOBI':
                try:
                    num_sections, = struct.unpack_from('>H', self.header, 76)
                    sections = self.f.read(num_sections*8)
                    self.sections = struct.unpack_from('>%dL' % (num_sections*2), sections, 0)[::2] + (0xfffffff, )
                except:
                    raise ValueError('Unexpected error in reading Mobi book header information - unable to unpack.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending to device')
            elif self.ident != 'TEXtREAd':
                raise ValueError('This book contains invalid Mobi book header information and cannot be read.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending it to the device')

    def loadSection(self, section):
        before, after = self.sections[section:section+2]
        self.f.seek(before)
        return self.f.read(after - before)

# Mobi metadata parsing
class Mobi:
    def __init__(self, filename):
        self.title = None
        self.mobi_type = None
        self.text_encoding = None
        try:
            sections = Sectionizer(filename, 'rb')
            if sections.ident == 'TEXtREAd':
                # Old Palm Doc format
                titlelen = self.zbyte(sections.header[0:31])
                self.title = sections.header[0:titlelen]
                self.exth = []
            else:
                header = sections.loadSection(0)
                len_mobi = struct.unpack_from('>L', header, 20)[0] + 16
                mobi_raw = header[:len_mobi]
                self.mobi_type, = struct.unpack_from('>L', header, 24)
                self.text_encoding, = struct.unpack_from('>L', header, 28)
                titleoffset, titlelen = struct.unpack_from('>LL', mobi_raw, 84)
                self.title = header[titleoffset:titleoffset+titlelen]
                len_exth, = struct.unpack_from('>L', header, len_mobi+4)
                exth_records = header[len_mobi:len_mobi+len_exth][12:]
                self.exth = dict()
                while len(exth_records) > 8:
                    rectype, reclen = struct.unpack_from('>LL', exth_records)
                    recdata = exth_records[8:reclen]
                    self.exth[rectype] = recdata
                    exth_records = exth_records[reclen:]
        except ValueError as error:
            raise ValueError(error)
        except:
            raise ValueError('Unexpected error in reading Mobi book header information.  Try using Calibre to reconvert the book to Mobi format (even if you need to convert from Mobi format) and resending to device')

    def zbyte(self, text):
        for i in range(len(text)):
            if text[i] == '\0':
                break
        return i

# Kindlet metadata parsing
class Kindlet:
    def __init__(self, filename):
        import zipfile, zipimport
        # For official apps, ASIN is stored in the Amazon-ASIN field of META-INF/MANIFEST.MF, and title in the Implementation-Title field
        try:
            kindlet = zipfile.ZipFile( filename, 'r')
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            kdkmanifest = kindlet.read( 'META-INF/MANIFEST.MF' )
            # Catch Title
            kdktitlem = re.search( '(^Implementation-Title: )(.*?$)', kdkmanifest, re.MULTILINE )
            if kdktitlem and kdktitlem.group(2):
                self.title = kdktitlem.group(2).strip()
            else:
                self.title = None
            # Catch ASIN
            kdkasinm = re.search( '(^Amazon-ASIN: )(.*?$)', kdkmanifest, re.MULTILINE )
            if kdkasinm and kdkasinm.group(2):
                self.asin = kdkasinm.group(2).strip()
            else:
                self.asin = None
            kindlet.close()

# Topaz metadata parsing. Almost verbatim code by Greg Riker from Calibre
class StreamSlicer(object):
    def __init__(self, stream, start=0, stop=None):
        self._stream = stream
        self.start = start
        if stop is None:
            stream.seek(0, 2)
            stop = stream.tell()
        self.stop = stop
        self._len = stop - start

    def __getitem__(self, key):
        stream = self._stream
        base = self.start
        if isinstance(key, (int, long)):
            stream.seek(base + key)
            return stream.read(1)
        if isinstance(key, slice):
            start, stop, stride = key.indices(self._len)
            if stride < 0:
                start, stop = stop, start
            size = stop - start
            if size <= 0:
                return ''
            stream.seek(base + start)
            data = stream.read(size)
            if stride != 1:
                data = data[::stride]
            return data
        raise TypeError('stream indices must be integers')

class Topaz(object):
    def __init__(self, filename):
        try:
            self.stream = open(filename, 'rb')
        except:
            raise ValueError('Unable to open file %s' % filename)
        else:
            self.data = StreamSlicer(self.stream)
    
            sig = self.data[:4]
            if not sig.startswith('TPZ'):
                raise ValueError('Not a valid Topaz file')
            offset = 4
    
            self.header_records, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            self.topaz_headers = self.get_headers(offset)
    
            # First integrity test - metadata header
            if not 'metadata' in self.topaz_headers:
                raise ValueError('Not a valid Topaz file, no metadata record')
    
            # Second integrity test - metadata body
            md_offset = self.topaz_headers['metadata']['blocks'][0]['offset']
            md_offset += self.base
            if self.data[md_offset+1:md_offset+9] != 'metadata':
                raise ValueError('Not a valid Topaz file, damaged metadata record')
    
            # Get metadata, and store what we need
            try:
                self.title, self.asin, self.type = self.get_metadata()
            except:
                raise ValueError('Unable to read metadata from file %s' % filename)
            self.stream.close()

    def decode_vwi(self,bytes):
        pos, val = 0, 0
        done = False
        while pos < len(bytes) and not done:
            b = ord(bytes[pos])
            pos += 1
            if (b & 0x80) == 0:
                done = True
            b &= 0x7F
            val <<= 7
            val |= b
            if done: break
        return val, pos

    def get_headers(self, offset):
        # Build a dict of topaz_header records, list of order
        topaz_headers = {}
        for x in range(self.header_records):
            offset += 1
            taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            tag = self.data[offset:offset+taglen]
            offset += taglen
            num_vals, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            blocks = {}
            for val in range(num_vals):
                hdr_offset, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                len_uncomp, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                len_comp, consumed = self.decode_vwi(self.data[offset:offset+4])
                offset += consumed
                blocks[val] = dict(offset=hdr_offset,len_uncomp=len_uncomp,len_comp=len_comp)
            topaz_headers[tag] = dict(blocks=blocks)
        self.eoth = self.data[offset]
        offset += 1
        self.base = offset
        return topaz_headers

    def get_metadata(self):
        ''' Return MetaInformation with title, author'''
        self.get_original_metadata()
        return self.metadata['Title'], self.metadata['ASIN'], self.metadata['CDEType']

    def get_original_metadata(self):
        offset = self.base + self.topaz_headers['metadata']['blocks'][0]['offset']
        self.md_header = {}
        taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
        offset += consumed
        self.md_header['tag'] = self.data[offset:offset+taglen]
        offset += taglen
        self.md_header['flags'] = ord(self.data[offset])
        offset += 1
        self.md_header['num_recs'] = ord(self.data[offset])
        offset += 1

        self.metadata = {}
        for x in range(self.md_header['num_recs']):
            taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            tag = self.data[offset:offset+taglen]
            offset += taglen
            md_len, consumed = self.decode_vwi(self.data[offset:offset+4])
            offset += consumed
            metadata = self.data[offset:offset + md_len]
            offset += md_len
            self.metadata[tag] = metadata
