#!/usr/bin/env python
# -*- coding: utf-8 -*-

# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
#
# This script copies the file to a new file named with the ook title


# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>
#
# Written by Paul Durrant, 2010-2012, paul@durrant.co.uk, pdurrant on mobileread.com
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
#
# Changelog
#  1.00 - Initial version
#  1.01 - Improved reporting of errors in files
#  1.02 - Unicode support under Windows and removed unnecessary code

__version__ = '1.02'

import sys
import struct
import binascii
import os
import htmlentitydefs
import re

class Unbuffered:
    def __init__(self, stream):
        self.stream = stream
    def write(self, data):
        self.stream.write(data)
        self.stream.flush()
    def __getattr__(self, attr):
        return getattr(self.stream, attr)

def utf8_argv():
    if sys.platform.startswith('win'):
        """Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
        strings.

        Versions 2.x of Python don't support Unicode in sys.argv on
        Windows, with the underlying Windows API instead replacing multi-byte
        characters with '?'.
        """

        from ctypes import POINTER, byref, cdll, c_int, windll
        from ctypes.wintypes import LPCWSTR, LPWSTR

        GetCommandLineW = cdll.kernel32.GetCommandLineW
        GetCommandLineW.argtypes = []
        GetCommandLineW.restype = LPCWSTR

        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
        CommandLineToArgvW.restype = POINTER(LPWSTR)

        cmd = GetCommandLineW()
        argc = c_int(0)
        argv = CommandLineToArgvW(cmd, byref(argc))
        if argc.value > 0:
            # Remove Python executable and commands if present
            start = argc.value - len(sys.argv)
            return [argv[i].encode('utf-8') for i in
                    xrange(start, argc.value)]
        return []
    else:
        return [unicode(sys.argv[i],sys.stdin.encoding).encode('utf-8') for i in xrange(len(sys.argv))]


def unescape(text):
    def fixup(m):
        text = m.group(0)
        if text[:2] == u"&#":
            # character reference
            try:
                if text[:3] == u"&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub(u"&#?\w+;", fixup, text)

def safername(utext):
    utext = utext.replace(u'<',u'[').replace(u'>',u']').replace(u' : ',u' – ').replace(u': ',u' – ').replace(u':',u'—').replace(u'/',u'_').replace(u'\\',u'_').replace(u'|',u'_').replace(u'\"',u'\'')
    return utext

class NameException(Exception):
    pass


class MobiBook:
    def loadSection(self, section):
        if (section + 1 == self.num_sections):
            endoff = len(self.data_file)
        else:
            endoff = self.sections[section + 1][0]
        off = self.sections[section][0]
        return self.data_file[off:endoff]

    def __init__(self, infile):

        # initial sanity check on file
        self.data_file = file(infile, 'rb').read()
        self.mobi_data = ''
        self.header = self.data_file[0:78]
        if self.header[0x3C:0x3C+8] != 'BOOKMOBI' and self.header[0x3C:0x3C+8] != 'TEXtREAd':
            print "invalid file format: {0}".format(os.path.basename(infile).encode('utf-8'))
            raise NameException()
        self.magic = self.header[0x3C:0x3C+8]
        self.crypto_type = -1

        # get section zero
        offset0, = struct.unpack('>L', self.data_file[78:82])
        offset1, = struct.unpack('>L', self.data_file[86:90])
        self.sect = self.data_file[offset0:offset1]


        # parse information from section 0
        self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
        self.compression, = struct.unpack('>H', self.sect[0x0:0x0+2])

        if self.magic == 'TEXtREAd':
            self.extra_data_flags = 0
            self.mobi_length = 0
            self.mobi_codepage = 1252
            self.mobi_version = -1
            self.meta_array = {}
            return
        self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
        self.mobi_codepage, = struct.unpack('>L',self.sect[0x1c:0x20])
        self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])

        # if exth region exists parse it for metadata array
        self.meta_array = {}
        try:
            exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
            exth = 'NONE'
            if exth_flag & 0x40:
                exth = self.sect[16 + self.mobi_length:]
            if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
                nitems, = struct.unpack('>I', exth[8:12])
                pos = 12
                for i in xrange(nitems):
                    type, size = struct.unpack('>II', exth[pos: pos + 8])
                    content = exth[pos + 8: pos + size]
                    self.meta_array[type] = content
                    pos += size
        except:
            self.meta_array = {}
            pass

    def getBookTitle(self):
        codec_map = {
            1252 : 'windows-1252',
            65001 : 'utf-8',
        }
        title = ''
        codec = 'windows-1252'
        if self.magic == 'BOOKMOBI':
            if 503 in self.meta_array:
                title = self.meta_array[503]
            else:
                toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
                tend = toff + tlen
                title = self.sect[toff:tend]
            if self.mobi_codepage in codec_map.keys():
                codec = codec_map[self.mobi_codepage]
        if title == '':
            title = self.header[:32]
            title = title.split("\0")[0]

        return unescape(unicode(title,codec))

if __name__ == "__main__":
    printencoding = sys.stdout.encoding
    if printencoding == None:
        printencoding = 'utf-8'
    sys.stdout=Unbuffered(sys.stdout)
    argv = utf8_argv()
    print 'KindleNamer v{0:s}.\nWritten 2012 by Paul Durrant.'.format(__version__)
    if len(argv)<2 or len(argv)>3:
        print "Copies the book to a new file, in the same folder, named with the book title."
        print "Optionally adds the old name and the new name to a log file."
        print ""
        print "Usage:"
        print "    {0:s} bookfile [logfile]".format(os.path.basename(argv[0]).encode(printencoding,'replace'))
        sys.exit(0)
    else:
        infile = unicode(argv[1],'utf-8')
        mb = MobiBook(infile)
        outfile = os.path.join(os.path.dirname(infile),safername(mb.getBookTitle())+u'.mobi')
        count = 1
        while os.path.lexists(outfile):
            count += 1
            outfile = os.path.join(os.path.dirname(infile),safername(mb.getBookTitle())+u'{0:d}.mobi'.format(count))
        file(outfile, 'wb').write(file(infile,'rb').read())
        if len(sys.argv) == 3:
            logfile = unicode(argv[2],'utf-8')
            file(logfile, 'ab').write(os.path.basename(infile.encode('utf-8'))+'\t'+os.path.basename(outfile.encode('utf-8'))+os.linesep)
        print "Copied \"{0:s}\" to \"{1:s}\".".format(os.path.basename(infile.encode(printencoding,'replace')),os.path.basename(outfile.encode(printencoding,'replace')))
    sys.exit(0)
