#! /usr/bin/python

this_program = 'exthtitle.py'
this_version = '1.00'

# Changelog
#  1.00 - 20120616 written by dilo_sec

'''
Usage: exthtitle.py

use exthtitle.py to dump booktitle of azw and mobi books in current folder and subfolders

'''

class Unbuffered:
   def __init__( self, stream):
      self.stream = stream
   def write( self, data):
      self.stream.write( data)
      self.stream.flush()
   def __getattr__( self, attr):
      return getattr( self.stream, attr)

import sys, os, struct
sys.stdout = Unbuffered( sys.stdout)

def dump( root, infile):
   print 'book:' + infile
   book = root + '\\' + infile
   f = open( book, 'rb')
   # get header
   raw_header = f.read( 78)
   identifier = raw_header[ 0x3C:0x3C+8]
   if identifier != 'BOOKMOBI' and identifier != 'TEXtREAd':
      print '----- invalid file format'
      return
   num_sections, = struct.unpack_from( '>L', raw_header, 74)
   raw_sections = f.read( num_sections*8)
   sections = struct.unpack_from( '>%dL'%(num_sections*2), raw_sections, 0)[::2]  + (0xfffffff, )
   indices  = struct.unpack_from( '>%dL'%(num_sections*2), raw_sections, 0)[1::2] + (0xfffffff, )
   # get section_0
   before, after = sections[ 0:2]
   f.seek( before)
   section = f.read( after - before)
   f.close

   # check MOBI id
   mobi_id = section[16:20]
   if mobi_id != 'MOBI':
      print '----- MOBI identifier not found'
      return

   # get MOBI header
   m_length, type, codepage, unique_id, version = struct.unpack( '>LLLLL', section[20:40])
   mobihdr = section[16:16+m_length]

   # if exth region exists then parse it for the metadata
   exth_flag, = struct.unpack( '>L', section[0x80:0x84])
   if exth_flag & 0x40:
      # check EXTH id
      exth_id = section[16+m_length:16+m_length+4]
      if exth_id != 'EXTH':
         print 'EXTH identifier not found'
         return

   # get EXTH header
   e_length, records = struct.unpack( '>LL', section[16+m_length+4:16+m_length+12])
   exthhdr = section[16+m_length:16+m_length+e_length]

#  dbname = raw_header[0:32]
#  dbnull = dbname.find( '\x00')
#  if dbnull != -1:
#     dbname = dbname[0:dbnull]
#  print 'dbname="' + dbname + '"'

   # get book title
   toff, tlen = struct.unpack( '>II', section[0x54:0x5c])
   tend = toff + tlen
   title = section[toff:tend]
   print 'booktitle:' + title

#  # dump EXTH header items
#  exth_length, num_items = struct.unpack( '>LL', exthhdr[4:12])
#  pos = 12
#  left = num_items
#  while left > 0:
#     left -= 1
#     id, size = struct.unpack( '>LL', exthhdr[ pos: pos+8])
#     content = exthhdr[ pos+8: pos+size]
#     if id == 503:
#        print 'updatedtitle:' + content
#     pos += size

   print ''

   return


def main():
   path = os.getcwd()

   extensions = [ '.azw', '.mobi' ]

   numdirs = 0
   numfiles = 0
   # find number of folders and files with extensions
   for root, dirs, files in os.walk( path):
      dirprocessed = 0
      for infile in files:
         file, ext = os.path.splitext( infile)
         infileext = os.path.splitext( infile)[1].lower()
         if infileext in extensions:
            numfiles += 1
            dump( root, infile)
            if dirprocessed == 0:
               dirprocessed = 1
               numdirs += 1

   if numfiles == 0:
      print 'no files found'
   else:
      print 'found '+ str( numfiles) + ' files in ' + str( numdirs) + ' folders'


   return 0

if __name__ == "__main__":
   sys.exit( main())

