#!/usr/bin/env python
#
#  Output ASIN hash for use in collections.json
#  Shamelessly ripped out of mobiunpack 0.23
#  Cf. http://www.mobileread.com/forums/showthread.php?p=774836
#
##

import sys
import os
import struct

class Sectionizer:
	def __init__(self, filename, perm):
		self.f = file(filename, perm)
		header = self.f.read(78)
		self.ident = header[0x3C:0x3C+8]
		self.num_sections, = struct.unpack_from('>H', header, 76)
		sections = self.f.read(self.num_sections*8)
		self.sections = struct.unpack_from('>%dL' % (self.num_sections*2), sections, 0)[::2] + (0xfffffff, )

	def loadSection(self, section):
		before, after = self.sections[section:section+2]
		self.f.seek(before)
		return self.f.read(after - before)

def getMetaData(extheader):
	id_map_strings = {
		113 : 'ASIN',
		501 : 'TYPE',
	}
	metadata = {}
	length, num_items = struct.unpack('>LL', extheader[4:12])
	extheader = extheader[12:]
	pos = 0
	left = num_items
	while left > 0:
		left -= 1
		id, size = struct.unpack('>LL', extheader[pos:pos+8])
		content = extheader[pos + 8: pos + size]
		if id in id_map_strings.keys():
			name = id_map_strings[id]
			metadata[name] = content
		pos += size
	return metadata

def parseBook(infile):
	sect = Sectionizer(infile, 'rb')
	if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
		raise ValueError('invalid file format')

	header = sect.loadSection(0)

	# get length of this header
	length, type, codepage, unique_id, version = struct.unpack('>LLLLL', header[20:40])

	# if exth region exists then parse it for the metadata
	exth_flag, = struct.unpack('>L', header[0x80:0x84])
	metadata = {}
	if exth_flag & 0x40:
		metadata = getMetaData(header[16 + length:])

	# print formatted hash
	if 'ASIN' in metadata and 'TYPE' in metadata:
		print "#%s^%s" %( metadata['ASIN'], metadata['TYPE'] )
	else:
		raise ValueError('failed to parse metadata')

if len(sys.argv) < 2:
	print "JSON ASIN Hash Gen 0.1"
	print "  Shamelessly ripped out of MobiUnpack 0.23,"
	print "  Copyright (c) 2009 Charles M. Hannum <root@ihack.net>"
	print "  With Images Support and Other Additions by P. Durrant and K. Hendricks"
	print ""
	print "Description:"
	print "  Outputs the ASIN Hash of an Amazon MobiPocket file (encrypted or not)"
	print "  to use in a Kindle json collections db"
	print "Usage:"
	print "  mobiunpack.py infile.azw"
	sys.exit(1)
else:
	infile = sys.argv[1]
	infileext = os.path.splitext(infile)[1].upper()
	if infileext not in ['.MOBI', '.PRC', '.AZW']:
		print "Error: first parameter must be a mobipocket file."
		sys.exit(1)

	try:
		parseBook(infile)
	except ValueError, e:
		print "Error: %s" % e
		sys.exit(1)
	sys.exit(0)