View Single Post
Old 12-10-2012, 08:11 PM   #66
Padde
Junior Member
Padde began at the beginning.
 
Posts: 6
Karma: 10
Join Date: Nov 2012
Location: Germany
Device: Kindle Paperwhite
I had some trouble with the latest Version of KindleStrip (from THIS post) under ActivePython v3.2.2.3 (Win64-x64).

Error Message on first try:
Code:
File "D:\Work\XAMPP\htdocs\test\kindlestrip_py3.py", line 224, in <module>
    data_file = file(infile, 'rb').read()
NameError: name 'file' is not defined
Error Message on second try after replacing all calls to file() with open():
Code:
Traceback (most recent call last):
  File "D:\Work\XAMPP\htdocs\test\kindlestrip_py3.py", line 228, in <module>
    print("Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()))
TypeError: Can't convert 'bytes' object to str implicitly
Bugfixed portion of Code:
Spoiler:

Code:
if __name__ == "__main__":
    sys.stdout=Unbuffered(sys.stdout)
    print(('KindleStrip v%(__version__)s. '
       'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals()))
    if len(sys.argv)<3 or len(sys.argv)>4:
        print("Strips the Sources record from Mobipocket ebooks")
        print("For ebooks generated using KindleGen 1.1 and later that add the source")
        print("Usage:")
        print("    %s <infile> <outfile> <strippeddatafile>" % sys.argv[0])
        print("<strippeddatafile> is optional.")
        sys.exit(1)
    else:
        infile = sys.argv[1]
        outfile = sys.argv[2]
        data_file = open(infile, 'rb').read()
        try:
            strippedFile = SectionStripper(data_file)
            open(outfile, 'wb').write(strippedFile.getResult())
            print("Header Bytes: %s" % binascii.b2a_hex(strippedFile.getHeader()))
            if len(sys.argv)==4:
                open(sys.argv[3], 'wb').write(strippedFile.getStrippedData())
        except StripException as e:
            print("Error: %s" % e)
            sys.exit(1)
    sys.exit(0)


For a project of mine i created this PHP port of KindleStrip:
Spoiler:

The Class Mobi:
Code:
<?php
# This PHP Class is a PHP port of KindleStrip from Paul Durrant.
# Please refer to <https://www.mobileread.com/forums/showthread.php?t=96903>
# for more information on the original Python version of KindleStrip.
#
# This script strips the penultimate record from a Mobipocket file.
# This is useful because the current KindleGen add a compressed copy
# of the source files used in this record, making the ebook produced
# about twice as big as it needs to be.
#
#
# This is free and unencumbered software released into the public domain.
# 
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
# 
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
# 
# For more information, please refer to <http://unlicense.org/>
#
# Written by Patrick Klös, 2012, pkloes@web.de, padde on mobileread.com
#
# Changelog
#  1.00 - Initial version


class Mobi {
    const MOBI_FORMAT_IDENTIFIER                = "BOOKMOBI";
    const MOBI_SRCS_RECORD_IDENTIFIER           = "SRCS";
    const MOBI_EXTH_RECORD_IDENTIFIER           = "EXTH";
    const MOBI_EXTH_RECORD_CHECKBIT             = 0x40;
    const MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER = 0x79;
    
    private $data = null;
    private $output = null;
    
    public function __construct($file = null) {
        if($file != null) {
            $this->load($file);
        }
    }
    
    public function load($file) {
        if(is_readable($file)) {
            $this->data = file_get_contents($file);
        }
        
        return $this->isValid();
    }
    
    public function isValid() {
        $valid = false;
        
        if($this->data != null) {
            $valid = ($this->unpack_from("a8", $this->data, 0x3C) == Mobi::MOBI_FORMAT_IDENTIFIER);
        }
        
        $this->data = ($valid) ? $this->data : null;
        
        return $valid;
    }
    
    public function getOriginalData() {
        if($this->data != null) {
            return $this->data;
        }
        
        return false;
    }
    
    public function getStrippedData() {
        if($this->output != null) {
            return $this->output;
        }
        
        return false;
    }
    
    public function stripSRCS() {
        $num_sections = $this->unpack_from("n", $this->data, 76);
        
        $offset0 = $this->unpack_from("N", $this->data, 78);
        $offset1 = $this->unpack_from("N", $this->data, 86);
        
        $mobiheader = substr($this->data, $offset0, $offset1 - $offset0);
        
        list($srcs_secnum, $srcs_cnt) = $this->unpack_from("N2", $mobiheader, 0xe0);

        if($srcs_secnum == 0xffffffff || $srcs_cnt == 0) {
            return $this->data;
        }

        $next = $srcs_secnum + $srcs_cnt;
        
        list($srcs_offset, $flgval) = $this->unpack_from("N2", $this->data, 78 + ($srcs_secnum * 8));
        list($next_offset, $flgval) = $this->unpack_from("N2", $this->data, 78 + ($next * 8));
        
        $srcs_length = $next_offset - $srcs_offset;

        if($this->unpack_from("a4", $this->data, $srcs_offset) != Mobi::MOBI_SRCS_RECORD_IDENTIFIER) {
            return $this->data;
        }

        $this->output  = substr($this->data, 0, 68) . pack("N", (($num_sections - $srcs_cnt) * 2 + 1));
        $this->output .= substr($this->data, 72, 4);
        $this->output .= pack("n", $num_sections - $srcs_cnt); 

        $delta = $srcs_cnt * -8;

        foreach(range(0, $srcs_secnum - 1) as $i) {
            list($offset, $flgval) = $this->unpack_from("N2", $this->data, ($i * 8) + 78);
            $offset += $delta;
        
            $this->output .= pack("N", $offset) . pack("N", $flgval);
        }

        $delta = $delta - $srcs_length;

        foreach(range($srcs_secnum + $srcs_cnt, $num_sections - 1) as $i) {
            list($offset, $flgval) = $this->unpack_from("N2", $this->data, ($i * 8) + 78);
            $offset += $delta;
            $flgval = ($i - $srcs_cnt) * 2;
            
            $this->output .= pack("N", $offset) . pack("N", $flgval);
        }
        
        list($first_offset, $flgval) = $this->unpack_from("N2", $this->output, 78);

        $this->output .= str_repeat("\0", $first_offset - strlen($this->output));
        $this->output .= substr($this->data, $offset0, $srcs_offset - $offset0);
        $this->output .= substr($this->data, $srcs_offset + $srcs_length);

        $num_section -= $srcs_cnt;

        list($offset0, $flgval0) = $this->unpack_from("N2", $this->output, 78);
        list($offset1, $flgval1) = $this->unpack_from("N2", $this->output, 86);

        $mobiheader = substr($this->output, $offset0, $offset1 - $offset0);
        $mobiheader = substr($mobiheader, 0, 0xe0) . pack("N", 0xffffffff) . pack("N", 0) . substr($mobiheader, 0xe8);
        $mobiheader = $this->fixExthRecord($srcs_secnum, $srcs_cnt, $mobiheader);

        $this->output = substr($this->output, 0, $offset0) . $mobiheader . substr($this->output, $offset1);

        return $this->output;
    }
    
    private function fixExthRecord($srcs_secnum, $srcs_cnt, $mobiheader) {
        $mobi_length = $this->unpack_from("N", $mobiheader, 0x14);
        $exth_flag   = $this->unpack_from("N", $mobiheader, 0x80);
        
        if($exth_flag & Mobi::MOBI_EXTH_RECORD_CHECKBIT) {
            $exth = substr($mobiheader, $mobi_length + 16);
        
            if(strlen($exth) >= 4 && substr($exth, 0, 4) == Mobi::MOBI_EXTH_RECORD_IDENTIFIER) {
                $nitems = $this->unpack_from("N", $exth, 8);
                $pos = 12;
            
                foreach(range(0, $nitems - 1) as $i) {
                    list($type, $size) = $this->unpack_from("N2", $exth, $pos);

                    if($type == Mobi::MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER) {
                        $boundaryptr = $this->unpack_from("N", substr($exth, $pos + 8, $pos + $size), 0);
                    
                        if($srcs_secnum <= $boundaryptr) {
                            $boundaryptr -= $srcs_cnt;
                            
                            $prefix = substr($mobiheader, 0, $mobi_length + $pos + 0x18);
                            $suffix = substr($mobiheader, $mobi_length + $pos + 0x1C);
                            $nval   = pack("N", $boundaryptr);
                            
                            $mobiheader = $prefix . $nval . $suffix;
                        }
                    }
                
                    $pos += $size;
                }
            }
        }

        return $mobiheader;
    }    
    
    private function unpack_from($format, $src, $offset) {
        $result = array_merge(unpack("@" . strval($offset) . "/" . $format, $src));
        
        foreach($result as $key => $val) {
            $result[$key] = ($val < 0 && is_int($val)) ? $val += 4294967296 : $val;
        }
        
        return (count($result) == 1) ? array_shift($result) : $result;
    }
}
?>
Sample usage:
Code:
<?php
require_once("Mobi.php");

$mobi = new Mobi("test.mobi");

file_put_contents("strip.mobi", $mobi->stripSRCS());
?>


### EDIT ###
I decided to switch to Java for my Project.. so here is a implementation in Java
Spoiler:

Code:
package de.pkloes.ComicConv.mobi.Exceptions;

public class InvalidMobiFileException extends Exception {
    public InvalidMobiFileException() {}

    public InvalidMobiFileException(String message) {
        super(message);
    }
}
Code:
package de.pkloes.ComicConv.mobi;

import de.pkloes.ComicConv.mobi.Exceptions.InvalidMobiFileException;

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

public class Tools {
    final private static long MOBI_FORMAT_IDENTIFIER = 0x424f4f4b4d4f4249L;
    final private static int MOBI_SRCS_RECORD_IDENTIFIER = 0x53524353;
    final private static int MOBI_EXTH_RECORD_IDENTIFIER = 0x45585448;
    final private static int MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER = 0x79;

    public static void stripSRCS(File mobi, File stripped) throws IOException, InvalidMobiFileException {
        FileChannel fich = null;
        FileChannel foch = null;

        try {
            FileInputStream fin = new FileInputStream(mobi);
            fich = fin.getChannel();
            RandomAccessFile fout = new RandomAccessFile(stripped, "rw");
            foch = fout.getChannel();
            fout.setLength(0);

            ByteBuffer bul = ByteBuffer.allocateDirect(8);
            ByteBuffer bui = ByteBuffer.allocateDirect(4);

            fich.read(bul, 0x3c);

            if((getUInt(bul, 0) << 32 | getUInt(bul, 4)) != MOBI_FORMAT_IDENTIFIER) {
                throw new InvalidMobiFileException("Unsupported Mobi Format");
            }

            bul.clear();
            fich.read(bul, 76);
            int record_count = bul.getChar(0);
            long header_offset = getUInt(bul, 2);
            bul.clear();

            ByteBuffer data = ByteBuffer.allocateDirect((int) (header_offset + 0xe8));
            fich.read(data, 0);

            long srcs_record_number = getUInt(data, (int) (header_offset + 0xe0));
            long srcs_record_count  = getUInt(data, (int)(header_offset + 0xe4));

            if(srcs_record_number == 0xffffffffL || srcs_record_count == 0) {
                foch.transferFrom(fich, 0, fich.size());
                return;
            }

            long strip_start_offset = getUInt(data, (int)((srcs_record_number * 8) + 78));
            long strip_end_offset = getUInt(data, (int)(((srcs_record_number + srcs_record_count) * 8) + 78));
            long strip_length = strip_end_offset - strip_start_offset;

            fich.read(bul, strip_start_offset);
            if(getUInt(bul, 0) != MOBI_SRCS_RECORD_IDENTIFIER) {
                throw new InvalidMobiFileException("Broken SRCS Record");
            }
            bul.clear();

            int new_record_count = (int)(record_count - srcs_record_count);

            fich.transferTo(0, ((new_record_count) * 8) + 78, foch);
            putUnsignedInt(bui, 0, ((new_record_count) * 2) + 1);
            foch.write(bui, 68);
            bui.rewind();
            bui.putChar(0, (char) new_record_count);
            foch.write(bui, 76);

            long delta = srcs_record_count * -8;

            for(int x = 0; x <= srcs_record_number; x++) {
                bul.clear();
                putUnsignedInt(bul, 0, getUInt(data, (x * 8) + 78) + delta);
                putUnsignedInt(bul, 4, getUInt(data, (x * 8) + 82));
                foch.write(bul, (x * 8) + 78);
            }

            delta = delta - strip_length;

            for(int x = (int) (srcs_record_number + srcs_record_count); x < record_count; x++) {
                bul.clear();
                putUnsignedInt(bul, 0, getUInt(data, (x * 8) + 78) + delta);
                putUnsignedInt(bul, 4, (x - srcs_record_count) * 2);
                foch.write(bul, ((x - srcs_record_count) * 8) + 78);
            }

            bui.clear();
            foch.read(bui, 78);

            long first_record_offset = getUInt(bui, 0);

            foch.write(ByteBuffer.allocateDirect((int)(first_record_offset - foch.size())));
            fich.transferTo(header_offset, strip_start_offset - header_offset, foch);
            fich.transferTo(strip_start_offset + strip_length, fich.size(), foch);

            bul.clear();
            putUnsignedInt(bul, 0, 0xffffffffL);
            putUnsignedInt(bul, 4, 0);
            foch.write(bul, first_record_offset + 0xe0);

            bui.clear();
            foch.read(bui, first_record_offset + 0x14);

            long mobi_header_length = getUInt(bui, 0);

            bui.clear();
            foch.read(bui, first_record_offset + 0x80);

            long exth_flag = getUInt(bui, 0);
            long exth_start_offset = first_record_offset + mobi_header_length + 16;

            if((exth_flag & 0x40) == 0x40) {
                bui.clear();
                foch.read(bui, exth_start_offset);

                if(getUInt(bui, 0) == MOBI_EXTH_RECORD_IDENTIFIER) {
                    bui.clear();
                    foch.read(bui, exth_start_offset + 8);

                    long item_count = getUInt(bui, 0);
                    int pos = 12;

                    for(int x = 0; x < item_count; x++) {
                        bul.clear();
                        foch.read(bul, exth_start_offset + pos);

                        long item_type = getUInt(bul, 0);
                        long item_size = getUInt(bul, 4);

                        if(item_type == MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER) {
                            bui.clear();
                            foch.read(bui, exth_start_offset + pos + 8);

                            long boundary_pointer = getUInt(bui, 0);

                            if(srcs_record_number <= boundary_pointer) {
                                bui.clear();
                                putUnsignedInt(bui, 0, boundary_pointer - srcs_record_count);
                                foch.write(bui, first_record_offset + mobi_header_length + pos + 0x18);
                            }
                        }

                        pos += item_size;
                    }
                }
            }
        }
        finally {
            if(fich != null) fich.close();
            if(foch != null) foch.close();
        }
    }

    private static void putUnsignedInt(ByteBuffer bb, int position, long value) {
        bb.putInt(position, (int) (value & 0xffffffffL));
    }

    private static long getUInt(ByteBuffer b, int idx) {
        return ((long) b.getInt(idx) & 0xffffffffL);
    }
}


Big to pdurrant (and contributors!) for KindleStrip

Last edited by Padde; 12-21-2012 at 12:52 AM.
Padde is offline   Reply With Quote