# BBeB dictionary dumper
# Version 0.1
# 2011-04-07
#
# Copyright (c) 2011 Igor Skochinsky
#
# This software is provided 'as-is', without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
#
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
#
#    1. The origin of this software must not be misrepresented; you must not
#    claim that you wrote the original software. If you use this software
#    in a product, an acknowledgment in the product documentation would be
#    appreciated but is not required.
#
#    2. Altered source versions must be plainly marked as such, and must not be
#    misrepresented as being the original software.
#
#    3. This notice may not be removed or altered from any source
#    distribution.


from construct import *
import struct, re, unicodedata, bisect

DicHeader = Struct("DicHeader",
    String("Signature", 2),
    ULInt16("Version"),
    ULInt16("Flags"),
    ULInt16("Encoding"),
    ULInt32("Unk08"),
    ULInt32("Unk0C"),
    ULInt32("Icon"),
    ULInt32("Unk14"),
    ULInt32("Unk18"),
    ULInt32("Unk1C"),
    Switch("Label", lambda ctx: ctx["Encoding"],
        {
            0: String("Label", 16, "shift_jisx0213"),
            1: String("Label", 16, "utf_16le")
        },
    )

)

DataMapId = Enum(ULInt16("id"),
    idSearchInfoMap = 0x1,
    idIndexPartMap = 0x2,
    idMainTextMap = 0x1000,
    idItemAdrMap = 0x2000,
    idAltMap = 0x2002,
    idGlobalReferenceMap = 0x2003,
    idGaiji12Map = 0x3000,
    idGaiji16Map = 0x3001,
    idGaiji24Map = 0x3002,
    idGaijiSVG   = 0x3010,
    idGaijiInfoMap = 0x4000,
    idPictMap = 0x5000,
    idDecodeDataMap = 0x6000,
)

ItemAdrMapExtra = Struct("ItemAdrMapExtra",
    ULInt32("Amount"),
    Byte("itemRecSize"),
    Byte("itemInfoWidth"),
)

DecodeDataMapExtra = Struct("DecodeDataMapExtra",
    ULInt32("dw0"),
    Byte("codeLen"),
    Byte("b5"),
)

GaijiInfoMapExtra = Struct("GaijiInfoMapExtra",
    ULInt16("w00"),
    ULInt16("w02"),
    Bytes("pad", 2),
)

AltMapExtra = Struct("AltMapExtra",
    ULInt16("w00"),
    Bytes("pad", 4),
)

ImageMapExtra = Struct("ImageMapExtra",
    ULInt16("w00"),
    Byte("imageInfoWidth"),
    Bytes("pad", 3),
)

DataMapTableEntry = Struct("DataMapTable",
    DataMapId,
    UBInt32("OffsetKey"),
    ULInt32("DataSize"),
    Switch("extra", lambda ctx: ctx["id"],
        {
            "idItemAdrMap"    : ItemAdrMapExtra,
            "idDecodeDataMap" : DecodeDataMapExtra,
            "idGaijiInfoMap"  : GaijiInfoMapExtra,
            "idAltMap"        : AltMapExtra,
            "idPictMap"       : ImageMapExtra
        },
        default = Bytes("extra", 6)
    )
)

def VarString(name):
    return PascalString(name, ULInt16("length"), "shift_jisx0213")

DicInfo = Struct("Info",
    ULInt16("InfoType"),
    ULInt16("InfoSize"),
    ULInt16("MemberDicNo"),
    ULInt16("Unk6"),
    Bytes("Unk8", 8),
    String("CreateDate", 10),
    Bytes("Unk1A", 6),
    String("CID", 16),
    VarString("Title"),
    VarString("TitleReading"),
    VarString("Publisher"),
    VarString("PublisherReading"),
)

Dic = Struct("BBeBDic",
    DicHeader,
    ULInt16("DataMapCount"),
    Padding(14),
    Array(lambda ctx: ctx["DataMapCount"], DataMapTableEntry),
    DicInfo,
)

class ItemRec:
    def __init__(self, flag, itemOff):
        self.flag = flag
        self.itemOff = itemOff

import sys, struct
import unicodedata

class HuffTree:
    def __init__(self, bbeb):
        self.bbeb = bbeb
        self.huffData = None

    def huff_byte(self, off = None):
        if off:
            _off = off
        else:
            _off = self.tree_off
        if _off >= 0 and _off < len(self.huffData):
            return ord(self.huffData[_off])
        else:
            raise Exception("Huffman tree offset %d out of range (max %d)", _off, len(self.huffData))

    def getHuffCode(self):
        code = 0
        for i in range(self.codeLen):
            code |= self.huff_byte(self.tree_off + i) << (i*8)
        return code

    def huffMoveRight(self):
        b0 = self.huff_byte()
        if b0 & 1:
            delta = ((b0 ^ 0xFC) >> 2) + 1
        else:
            delta = self.codeLen
        self.tree_off += delta
        return self.huffMoveLeft()

    def huffMoveLeft(self):
        b0 = self.huff_byte()
        if b0 & 1:
            res = ((b0 & 2) >> 1) + ((b0 ^ 0xFC) >> 2)
        else:
            code = self.getHuffCode()
            self.tree_off -= (code >> 1)
            res = 0
        return res

    def reset(self):
        if not self.huffData:
            mHuffMap = self.bbeb.dataMapDict["idDecodeDataMap"]
            self.bbeb.inf.seek(mHuffMap.Offset)
            self.huffData = self.bbeb.inf.read(mHuffMap.DataSize)
            self.extra_b5 = mHuffMap.extra.b5
            self.codeLen  = mHuffMap.extra.codeLen
            self.initialOffset = mHuffMap.extra.dw0
        self.tree_off = 0
        self.initialBits = self.extra_b5
        self.in_tree = False
        self.initialCodeNo = 0

    def get_chunk(self, res):
        b0 = self.huff_byte()
        chunk = ""
        if b0 & 2:
            chunk = '\x1F'
            res -= 1
        while res:
            self.tree_off += 1
            b1 = self.huff_byte()
            chunk += chr(b0^b1)
            b0 = b1
            res -= 1
        return chunk

    def feed_bit(self, bit):
        if self.in_tree:
            if bit:
                res = self.huffMoveRight()
            else:
                res = self.huffMoveLeft()
            if res:
                self.in_tree = False
                return self.get_chunk(res)
        else:
            self.initialCodeNo = (self.initialCodeNo << 1) | bit
            self.initialBits -= 1
            if self.initialBits == 0:
                self.tree_off = self.initialOffset + self.codeLen * self.initialCodeNo
                delta = self.getHuffCode()
                self.tree_off -= (delta >> 1)
                self.initialBits = self.extra_b5
                self.initialCodeNo = 0
                self.in_tree = True
        return None

    def print_map(self):
        self.reset()
        print "Dumping huffman tree"
        while self.tree_off < self.initialOffset:
            b0 = self.huff_byte()
            if b0 & 1:
                off = self.tree_off
                self.tree_off += 1
                chunklen = ((b0 & 2) >> 1) + ((b0 ^ 0xFC) >> 2)
                print "0x%06X: chunk(%d) %r" % (off, chunklen, self.get_chunk(chunklen))
                #self.tree_off += off + chunklen + 1
            else:
                code = self.getHuffCode()
                print "0x%06X: delta 0x%06X" % (self.tree_off, code>>1)
                self.tree_off += self.codeLen
        print "starting codes:"
        for i in range(self.extra_b5):
            code = self.getHuffCode()
            print "%d: delta 0x%06X" % (i, code>>1)
            self.tree_off += self.codeLen

IndexPartMapHdr = Struct("IndexPartMapHdr",
    ULInt16("IndexCount"),
    ULInt16("IdxType1_2"),
    ULInt16("IdxType4_8"),
    ULInt16("IdxType16"),
    Padding(8),
    Array(lambda ctx: ctx["IndexCount"], ULInt32("IndexOffsets"))
)

IndexHdr = Struct("IndexHdr",
    ULInt16("w_00"),
    ULInt16("w_02"),
    ULInt32("AmountOfKeys"),
    Byte("b08"),
    Byte("DataSize_b09"),
    Switch("lang", lambda ctx: ctx["w_02"],
        {
            4 : String("Language", 2),
        },
        default = Padding(2)
    ),
    Padding(4),
    Array(12, ULInt32("TableOffsets"))
)

IndexTableForwardSearchRec = Struct("ForwardSearchRec",
    String("firstKeyData", 8),
    ULInt32("remKeyOffset"),
    ULInt32("firstKeyNo"),
)

class IndexTable:
    def __init__(self, owner, idx):
        self.owner   = owner
        self.inf     = owner.bbeb.inf
        self.offset  = owner.offset + owner.hdr.TableOffsets[idx]
        self.size    = owner.tblSize(idx)
        self.numkeys = owner.hdr.AmountOfKeys

    def byteAt(self, off, dump = False):
        self.inf.seek(self.offset + off)
        b = ord(self.inf.read(1))
        if dump:
            print "byteAt %04X (%04X): %02X" % (off, self.offset + off, b)
        return b

class Table_ForwardSearch(IndexTable):
    def __init__(self, owner):
        IndexTable.__init__(self, owner, 0)
        numrecs = self.numkeys // 250 + 2
        self.inf.seek(self.offset)
        self.records = [ IndexTableForwardSearchRec.parse_stream(self.inf) for i in range(numrecs) ]

    def __str__(self):
        return "ForwardSearch: " + "\n".join(str(r) for r in self.records[:10])


class Table_KeyLength(IndexTable):
    def __init__(self, owner):
        IndexTable.__init__(self, owner, 1)
        self.inf.seek(self.offset)

    def getKeyLength(self, keyNo):
        if keyNo >= self.numkeys or self.offset == 0:
            return -1
        #print "getKeyLength(%d)" % keyNo
        sameLen = self.byteAt(keyNo >> 1)
        if keyNo & 1 == 0:
            sameLen >>= 4
        sameLen &= 0xF
        remainLen = self.byteAt(keyNo + ((self.numkeys+1) >> 1))
        return sameLen, remainLen

    def __str__(self):
        return "KeyLength:\n " + ", ".join(str(self.getKeyLength(i)) for i in range(250))


class Table_RemainderKey(IndexTable):
    def __init__(self, owner):
        IndexTable.__init__(self, owner, 2)
        self.cur_key_off = 0

    def getKey(self, off, klen):
        self.cur_key_off = off
        return self.getNextKey(klen)

    def getNextKey(self, klen):
        klen &= 0xFF
        #print "getNextKey: off %04X(%04X), len %d" % (self.cur_key_off, self.offset + self.cur_key_off, klen)
        self.inf.seek(self.offset + self.cur_key_off)
        d = self.inf.read(klen)
        self.cur_key_off += klen
        return d + "\xF0"

    def __str__(self):
        return "RemainderKey"


class Table_KeyAttribute(IndexTable):
    def __init__(self, owner):
        IndexTable.__init__(self, owner, 4)
        self.inf.seek(self.offset)
        self.isBitmap = ord(self.inf.read(1))

    def getAtr(self, keyNo):
        if keyNo >= self.numkeys or self.offset == 0:
            return -1
        if self.isBitmap == 1:
            b = self.byteAt(1 + (keyNo >> 3))
            return (b << (keyNo & 7)) & 0x80
        elif self.isBitmap == 0:
            return self.byteAt(keyNo + 1)

    def __str__(self):
        return "KeyAttribute isBitmap: %d; attrs: %s" % (self.isBitmap, ", ".join("0x%02X" % self.getAtr(i) for i in range(10)))


class Table_Key2Item(IndexTable):
    def __init__(self, owner):
        IndexTable.__init__(self, owner, 5)
        self.inf.seek(self.offset)
        self.dataSize = owner.hdr.DataSize_b09
        numoffs = self.numkeys // 250 + 1
        self.dw_00 = struct.unpack("<i", self.inf.read(4))[0]
        self.offsets = [ struct.unpack("<I", self.inf.read(4))[0] for i in range(numoffs) ]
        self.offsets.append(self.size)
        print "Index2Item: cur off = %04X, off0 = %04X" % (self.inf.tell(), self.offset + self.offsets[0])

    def __str__(self):
        return "Index2Item: dw_00 = %d\n offsets: %s" % (self.dw_00, ", ".join("0x%02X" % i for i in self.offsets[:10]))

    def getItemNo(self, keyNo):
        blockno = keyNo // 250
        remainder = keyNo % 250
        self.inf.seek(self.offset + self.offsets[blockno])
        chunk = self.inf.read(self.offsets[blockno+1] - self.offsets[blockno])
        off = 1
        run_len = ord(chunk[off-1])
        while run_len <= remainder:
            off += self.dataSize + 1
            run_len += ord(chunk[off-1])

        code = 0
        for i in range(self.dataSize):
            code |= ord(chunk[off+i]) << (i*8)
        return code + keyNo + self.dw_00

def decodeUtf16(s):
    r = u""
    for i in range(0, len(s), 2):
        b1 = ord(s[i])
        b1 = (b1>>7) | (b1<<1)
        if i + 1 == len(s):
            break
        b2 = ord(s[i+1])
        b2 = (b2>>7) | (b2<<1)
        r += unichr(b1 | (b2 << 8))
    return r

sjisCodes = [
    0x824F, 0x8250, 0x8251, 0x8252, 0x8253, 0x8254, 0x8255, 0x8256, # 0
    0x8257, 0x8258,      0,      0,      0,      0,      0,      0, # 8
    0x8260, 0x8261, 0x8262, 0x8263, 0x8264, 0x8265, 0x8266, 0x8267, # 0x10
    0x8268, 0x8269, 0x826A, 0x826B, 0x826C, 0x826D, 0x826E,      0, # 0x18
    0x826F, 0x8270, 0x8271, 0x8272, 0x8273, 0x8274, 0x8275, 0x8276, # 0x20
    0x8277, 0x8278, 0x8279,      0,      0,      0,      0,      0, # 0x28
    0x8281, 0x8282, 0x8283, 0x8284, 0x8285, 0x8286, 0x8287, 0x8288, # 0x30
    0x8289, 0x828A, 0x828B, 0x828C, 0x828D, 0x828E, 0x828F,      0, # 0x38
    0x8290, 0x8291, 0x8292, 0x8293, 0x8294, 0x8295, 0x8296, 0x8297, # 0x40
    0x8298, 0x8299, 0x829A,      0,      0,      0,      0,      0, # 0x48
    0x8140, 0x8149, 0x8168, 0x81F2, 0x8190, 0x8193, 0x8195, 0x8166, # 0x50
    0x8169, 0x816A, 0x8196, 0x817B, 0x8143, 0x817C, 0x8144,      0, # 0x58
    0x815E, 0x8146, 0x8147, 0x8183, 0x8181, 0x8184, 0x8148, 0x8197, # 0x60
    0x816D, 0x815F, 0x816E, 0x814F, 0x8151, 0x814D, 0x816F,      0, # 0x68
    0x8162, 0x8170, 0x8160, 0x8145, 0x815D ]                        # 0x70

def decodeSjis(s):
    r = u""
    for ch in s:
        b = ord(ch)
        if b >= len(sjisCodes):
            break
        cp = sjisCodes[b]
        js = chr(cp>>8) + chr(cp & 0xFF)
        r += js.decode('sjisx0213')
        if (b & 0xF) == 0xF:
            break
    return r

def encodeSjis(s):
    r = ""
    for i in range(0, len(s), 2):
        ch = ord(s[i]) << 8 + ord(s[i+1])
        try:
            b = sjisCodes.index(ch)
        except:
            continue
        r += chr(b)
    return r

def decodeDictString(s, indexCode):
    if indexCode == 4:
        return decodeUtf16(s)
    elif indexCode == 15 or indexCode == 1:
        return decodeSjis(s)

class Index:
    def __init__(self, bbeb, offset, size):
        self.bbeb = bbeb
        self.offset = offset
        self.size   = size
        self.bbeb.inf.seek(offset)
        self.hdr = IndexHdr.parse_stream(self.bbeb.inf)
        self.fwdSearchTbl    = Table_ForwardSearch(self)
        self.keyLengthTbl    = Table_KeyLength(self)
        self.remainderKeyTbl = Table_RemainderKey(self)
        self.keyAttrTbl      = Table_KeyAttribute(self)
        self.key2ItemTbl     = Table_Key2Item(self)
        self.keyno = None

    def tblSize(self, num):
        start = self.hdr.TableOffsets[num]
        if num + 1 < len(self.hdr.TableOffsets):
            return self.hdr.TableOffsets[num+1] - start
        else:
            return self.size - start

    def __str__(self):
        return str(self.hdr)

    def getFirstKey(self, blockno):
        rec = self.fwdSearchTbl.records[blockno]
        self.keyno = blockno * 250
        klen1, klen2 = self.keyLengthTbl.getKeyLength(self.keyno)
        self.kdata = rec.firstKeyData[:klen1] + self.remainderKeyTbl.getKey(rec.remKeyOffset, klen2)
        self.rem_off = rec.remKeyOffset + klen2
        return self.kdata

    def getNextKey(self):
        self.keyno += 1
        klen1, klen2 = self.keyLengthTbl.getKeyLength(self.keyno)
        self.kdata = self.kdata[:klen1] + self.remainderKeyTbl.getNextKey(klen2)
        self.rem_off += klen2
        return self.kdata

    def getKey(self, keyno):
        if self.keyno == None or self.keyno + 1 != keyno:
            kdata = self.getFirstKey(keyno // 250)
        while self.keyno != keyno:
            kdata = self.getNextKey()
        return kdata

    def getIndexCode(self):
        return self.hdr.w_02 & 0xFF

    def printTables(self):
        print "IndexCode: %d" % self.getIndexCode()
        print self.fwdSearchTbl
        print self.keyLengthTbl
        print self.remainderKeyTbl
        print self.keyAttrTbl
        print self.key2ItemTbl

    def printKeys(self):
        cnt = 250
        if self.hdr.AmountOfKeys < cnt:
            cnt = self.hdr.AmountOfKeys
        for i in range(cnt):
            k = self.getKey(i)
            itemno = self.key2ItemTbl.getItemNo(i)
            print "Key %d:" %i, k.encode('hex'), decodeDictString(k, self.getIndexCode()).encode('utf-8'),\
              "item %d" % itemno, "atr: %02X" % self.keyAttrTbl.getAtr(i)
            print self.bbeb.itemAdrMap.extractItem(itemno)


class IndexPartMap:
    def __init__(self, bbeb):
        self.bbeb = bbeb
        maprec = self.bbeb.dataMapDict["idIndexPartMap"]
        off = maprec.Offset
        size = maprec.DataSize
        self.bbeb.inf.seek(off)
        self.hdr = IndexPartMapHdr.parse_stream(self.bbeb.inf)
        self.indexes = [Index(bbeb, off + self.hdr.IndexOffsets[i], size) for i in range(self.hdr.IndexCount)]

    def __str__(self):
        return str(self.hdr)

class ItemAdrMap:
    def __init__(self, bbeb):
        self.bbeb = bbeb
        mtext = self.bbeb.dataMapDict["idMainTextMap"]
        self.textsize = mtext.DataSize
        self.textoffset = mtext.Offset
        adrmap = self.bbeb.dataMapDict["idItemAdrMap"]
        #print "ItemAdrMap:"
        n = adrmap.extra.Amount
        s = adrmap.extra.itemRecSize
        if adrmap.DataSize != n*s:
            raise Exception("Warning: inconsistency in ItemAdrMap!")

        self.itemcount = n
        self.itemsize = s
        self.adroffset = adrmap.Offset
        self.encoding = bbeb.hdr.DicHeader.Encoding

    def getItemAdr(self, i):
        ItemKey = [ 246, 1, 57, 240, 91, 143, 130, 186, 81, 81, 197, 134,
                    140, 102, 219, 82, 77, 25, 33, 8, 196, 105, 158, 25,
                    62, 14, 72, 58, 16, 108, 14, 34, 177, 99, 108, 80,
                    226, 83, 179, 44, 98, 105, 198, 145, 96, 177, 246,
                    80, 156, 84, 150, 105, 54, 184, 129, 102, 249, 140,
                    206, 11, 109, 220, 131, 78, 171, 247, 30, 231, 30,
                    223, 44, 162, 185, 16, 110, 157, 222, 17, 14, 122,
                    235, 201, 249, 52, 124, 7, 47, 62, 64, 136, 135, 182,
                    181, 27, 1, 188, 50, 132, 140, 124, 55, 97, 130, 250,
                    32, 175, 29, 204, 117, 253, 59, 108, 156, 45, 10, 148,
                    137, 244, 240, 159, 129, 61, 141, 98, 92, 231, 30,
                    114, 12, 128, 79, 232, 34, 97, 169, 102, 79, 93, 200,
                    161, 34, 141, 90, 180, 202, 48, 44, 116, 39, 145, 1,
                    33, 254, 195, 116, 103, 44, 225, 104, 196, 53, 138,
                    41, 181, 251, 60, 101, 50, 235, 115, 9, 228, 230, 51,
                    184, 209, 132, 81, 181, 168, 221, 96, 134, 56, 66,
                    29, 237, 181, 104, 184, 151, 222, 173, 153, 221, 199,
                    100, 149, 180, 139, 136, 38, 48, 134, 124, 129, 194,
                    61, 15, 185, 54, 14, 195, 71, 114, 122, 168, 83, 13,
                    58, 233, 103, 182, 74, 244, 106, 206, 2, 230, 208,
                    60, 69, 165, 24, 74, 141, 33, 126, 175, 114, 34, 10,
                    207, 185, 103, 130, 124, 232, 211, 136, 95, 72, 57,
                    102, 87, 0 ]

        if i >= self.itemcount:
            raise Exception("Warning: item number (%d) too large!" % n)
        s = self.itemsize
        self.bbeb.inf.seek(self.adroffset + s*i)
        item = bytearray(self.bbeb.inf.read(s))
        # decrypt
        for j in range(s):
            item[j] ^= ItemKey[(i+j) & 0xFF]
        # extract flag byte and the rest
        flag, item = item[0], item[1:]
        item_off = 0
        for k in reversed(item):
            item_off = (item_off << 8) + k
        #print "%d: flag=%X, offset=%08X" % (i, flag, item_off)
        return flag, item_off

    def extractItem(self, i, decode = False):
        if i >= self.itemcount:
            return None
        flag, itemOff = self.getItemAdr(i)
        start_off = itemOff
        startpos = flag & 7
        if i + 1 < self.itemcount:
            flag, itemOff = self.getItemAdr(i+1)
            data_len = itemOff - start_off
            endpos = flag & 7
            if endpos:
                data_len += 1
        else:
            data_len = self.textsize - start_off
            endpos = 0
        self.bbeb.inf.seek(self.textoffset + start_off)
        data = self.bbeb.inf.read(data_len)
        data_off = 0
        unpacked = ""
        startbit = startpos
        stopbit = 8
        self.bbeb.huffTree.reset()
        while data_off < data_len:
            if data_off == data_len - 1 and endpos != 0:
                stopbit = endpos
            bd = ord(data[data_off])
            while startbit < stopbit:
                bit = (bd >> (7-startbit)) & 1
                chunk = self.bbeb.huffTree.feed_bit(bit)
                if chunk:
                    if chunk[0] == '\x1F':
                        unpacked += "<1F " + " ".join('%02X' % ord(c) for c in chunk[1:]) + ">"
                    else:
                        unpacked += chunk
                startbit += 1
            data_off += 1
            startbit = 0
        if not decode:
            return unpacked
        if self.encoding == 1:
            return unpacked.decode('utf-16le')
        else:
            try:
                return unpacked.decode('sjisx0213')
            except UnicodeDecodeError as e:
                t = unpacked[e.start:e.end]
                print "Bad sjis text:", t.encode('hex')
                return unpacked[:e.start].decode('sjisx0213')


class BBeBDict:
    def __init__(self, filename):
        self.inf = open(filename, "rb")
        self.hdr = Dic.parse_stream(self.inf)
        self._parseMapTable()
        self.huffTree = HuffTree(self)
        self.itemAdrMap = ItemAdrMap(self)
        self.idxMap = IndexPartMap(self)

    def _parseMapTable(self):
        id15 = self.hdr.Info.CID[15]
        if id15.isdigit():
          self.mapKey = int(id15)
        elif id15.isupper():
          self.mapKey = (ord(id15) - ord('A')) % 10
        else:
          self.mapKey = 0
        i = 0
        self.dataMapDict = {}
        for e in self.hdr.DataMapTable:
            e.Offset = self.DecodeKey(i, e.OffsetKey)
            i += 1
            self.dataMapDict[e.id] = e
        #print self.dataMapDict

    def DecodeKey(self, i, key):
        DataMapKey = [0xBE9451F5, 0xC66452CF, 0x436D5784, 0x7D8E5947, 0xF2A9566D, 0xFC7916EE, 0xE645CA80, 0xF2AB5D26, 0x61333311, 0x5EB73C49]
        key2 = DataMapKey[(self.mapKey + i) % 10]
        r = key2 & 7
        key2 ^= key
        key2 = ((key2 >> 24) & 0xFF) | ((key2 & 0xFF0000) >> 8) | ((key2 & 0xFF00) << 8) | ((key2 & 0xFF) << 24)
        return (key2 >> (32-r)) | ((key2 << r) & 0xFFFFFFFF)


    def numItems(self):
        return self.dataMapDict["idItemAdrMap"].extra.Amount

    def getItemText(self, i, decode = False):
        return self.itemAdrMap.extractItem(i, decode)


def head2index(s):
    t = u""
    for c in s:
        oc = ord(c)
        if oc in range(0xFF41, 0xFF5B): #fullwidth a-z
            t += unichr(oc - 0x20) # convert to uppercase
        elif oc in range(0xFF21, 0xFF3B): #fullwidth A-Z
            t += unichr(oc)
        elif oc in range(0xFF10, 0xFF1A): #fullwidth 0-9
            t += unichr(oc)
    return t

d = BBeBDict(sys.argv[1])
print "<pre>"
print d.hdr
#print d.extractItem(1)
"""
print "IndexPartMap:", d.idxMap
i = 0
for idx in d.idxMap.indexes:
    print "Index %d:" % i
    print idx
    idx.printTables()
    idx.printKeys()
    i += 1
"""

#d.huffTree.print_map()
#d.printHuffMap()

indexwords = []
index2item = []
re_stripcodes = re.compile(r"\<1F.*?\>")
for i in range(d.numItems()):
    it = d.getItemText(i, True)
    hs = it.find(u'<1F 41><1F 16>') # <MainHeadline>
    he = it.find(u'<1F', hs+14) # </MainHeadline>
    if hs == -1 or he == -1:
        continue
    head = it[hs:he]
    head = re_stripcodes.sub("", head)
    hi = head2index(head)
    ni = bisect.bisect_left(indexwords, hi)
    indexwords.insert(ni, hi)
    index2item.insert(ni, i)
    print "%d: '%s' -> '%s'" % (i, head.encode('utf-8'), hi.encode('utf-8'))
    if i >= 1000:
        break

print "sorted index:"
for i in range(len(indexwords)):
    print "%s: %i" % (indexwords[i].encode('utf-8'), index2item[i])
