Junior Member
Posts: 6
Karma: 10
Join Date: Nov 2012
Location: Germany
Device: Kindle Paperwhite
|
I had some trouble with the latest Version of KindleStrip (from THIS post) under ActivePython v3.2.2.3 (Win64-x64).
Error Message on first try:
Code:
File "D:\Work\XAMPP\htdocs\test\kindlestrip_py3.py", line 224, in <module>
data_file = file(infile, 'rb').read()
NameError: name 'file' is not defined
Error Message on second try after replacing all calls to file() with open():
Code:
Traceback (most recent call last):
File "D:\Work\XAMPP\htdocs\test\kindlestrip_py3.py", line 228, in <module>
print("Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()))
TypeError: Can't convert 'bytes' object to str implicitly
Bugfixed portion of Code:
For a project of mine i created this PHP port of KindleStrip:
Spoiler:
The Class Mobi:
Code:
<?php
# This PHP Class is a PHP port of KindleStrip from Paul Durrant.
# Please refer to <https://www.mobileread.com/forums/showthread.php?t=96903>
# for more information on the original Python version of KindleStrip.
#
# This script strips the penultimate record from a Mobipocket file.
# This is useful because the current KindleGen add a compressed copy
# of the source files used in this record, making the ebook produced
# about twice as big as it needs to be.
#
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>
#
# Written by Patrick Klös, 2012, pkloes@web.de, padde on mobileread.com
#
# Changelog
# 1.00 - Initial version
class Mobi {
const MOBI_FORMAT_IDENTIFIER = "BOOKMOBI";
const MOBI_SRCS_RECORD_IDENTIFIER = "SRCS";
const MOBI_EXTH_RECORD_IDENTIFIER = "EXTH";
const MOBI_EXTH_RECORD_CHECKBIT = 0x40;
const MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER = 0x79;
private $data = null;
private $output = null;
public function __construct($file = null) {
if($file != null) {
$this->load($file);
}
}
public function load($file) {
if(is_readable($file)) {
$this->data = file_get_contents($file);
}
return $this->isValid();
}
public function isValid() {
$valid = false;
if($this->data != null) {
$valid = ($this->unpack_from("a8", $this->data, 0x3C) == Mobi::MOBI_FORMAT_IDENTIFIER);
}
$this->data = ($valid) ? $this->data : null;
return $valid;
}
public function getOriginalData() {
if($this->data != null) {
return $this->data;
}
return false;
}
public function getStrippedData() {
if($this->output != null) {
return $this->output;
}
return false;
}
public function stripSRCS() {
$num_sections = $this->unpack_from("n", $this->data, 76);
$offset0 = $this->unpack_from("N", $this->data, 78);
$offset1 = $this->unpack_from("N", $this->data, 86);
$mobiheader = substr($this->data, $offset0, $offset1 - $offset0);
list($srcs_secnum, $srcs_cnt) = $this->unpack_from("N2", $mobiheader, 0xe0);
if($srcs_secnum == 0xffffffff || $srcs_cnt == 0) {
return $this->data;
}
$next = $srcs_secnum + $srcs_cnt;
list($srcs_offset, $flgval) = $this->unpack_from("N2", $this->data, 78 + ($srcs_secnum * 8));
list($next_offset, $flgval) = $this->unpack_from("N2", $this->data, 78 + ($next * 8));
$srcs_length = $next_offset - $srcs_offset;
if($this->unpack_from("a4", $this->data, $srcs_offset) != Mobi::MOBI_SRCS_RECORD_IDENTIFIER) {
return $this->data;
}
$this->output = substr($this->data, 0, 68) . pack("N", (($num_sections - $srcs_cnt) * 2 + 1));
$this->output .= substr($this->data, 72, 4);
$this->output .= pack("n", $num_sections - $srcs_cnt);
$delta = $srcs_cnt * -8;
foreach(range(0, $srcs_secnum - 1) as $i) {
list($offset, $flgval) = $this->unpack_from("N2", $this->data, ($i * 8) + 78);
$offset += $delta;
$this->output .= pack("N", $offset) . pack("N", $flgval);
}
$delta = $delta - $srcs_length;
foreach(range($srcs_secnum + $srcs_cnt, $num_sections - 1) as $i) {
list($offset, $flgval) = $this->unpack_from("N2", $this->data, ($i * 8) + 78);
$offset += $delta;
$flgval = ($i - $srcs_cnt) * 2;
$this->output .= pack("N", $offset) . pack("N", $flgval);
}
list($first_offset, $flgval) = $this->unpack_from("N2", $this->output, 78);
$this->output .= str_repeat("\0", $first_offset - strlen($this->output));
$this->output .= substr($this->data, $offset0, $srcs_offset - $offset0);
$this->output .= substr($this->data, $srcs_offset + $srcs_length);
$num_section -= $srcs_cnt;
list($offset0, $flgval0) = $this->unpack_from("N2", $this->output, 78);
list($offset1, $flgval1) = $this->unpack_from("N2", $this->output, 86);
$mobiheader = substr($this->output, $offset0, $offset1 - $offset0);
$mobiheader = substr($mobiheader, 0, 0xe0) . pack("N", 0xffffffff) . pack("N", 0) . substr($mobiheader, 0xe8);
$mobiheader = $this->fixExthRecord($srcs_secnum, $srcs_cnt, $mobiheader);
$this->output = substr($this->output, 0, $offset0) . $mobiheader . substr($this->output, $offset1);
return $this->output;
}
private function fixExthRecord($srcs_secnum, $srcs_cnt, $mobiheader) {
$mobi_length = $this->unpack_from("N", $mobiheader, 0x14);
$exth_flag = $this->unpack_from("N", $mobiheader, 0x80);
if($exth_flag & Mobi::MOBI_EXTH_RECORD_CHECKBIT) {
$exth = substr($mobiheader, $mobi_length + 16);
if(strlen($exth) >= 4 && substr($exth, 0, 4) == Mobi::MOBI_EXTH_RECORD_IDENTIFIER) {
$nitems = $this->unpack_from("N", $exth, 8);
$pos = 12;
foreach(range(0, $nitems - 1) as $i) {
list($type, $size) = $this->unpack_from("N2", $exth, $pos);
if($type == Mobi::MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER) {
$boundaryptr = $this->unpack_from("N", substr($exth, $pos + 8, $pos + $size), 0);
if($srcs_secnum <= $boundaryptr) {
$boundaryptr -= $srcs_cnt;
$prefix = substr($mobiheader, 0, $mobi_length + $pos + 0x18);
$suffix = substr($mobiheader, $mobi_length + $pos + 0x1C);
$nval = pack("N", $boundaryptr);
$mobiheader = $prefix . $nval . $suffix;
}
}
$pos += $size;
}
}
}
return $mobiheader;
}
private function unpack_from($format, $src, $offset) {
$result = array_merge(unpack("@" . strval($offset) . "/" . $format, $src));
foreach($result as $key => $val) {
$result[$key] = ($val < 0 && is_int($val)) ? $val += 4294967296 : $val;
}
return (count($result) == 1) ? array_shift($result) : $result;
}
}
?>
Sample usage:
Code:
<?php
require_once("Mobi.php");
$mobi = new Mobi("test.mobi");
file_put_contents("strip.mobi", $mobi->stripSRCS());
?>
### EDIT ###
I decided to switch to Java for my Project.. so here is a implementation in Java
Spoiler:
Code:
package de.pkloes.ComicConv.mobi.Exceptions;
public class InvalidMobiFileException extends Exception {
public InvalidMobiFileException() {}
public InvalidMobiFileException(String message) {
super(message);
}
}
Code:
package de.pkloes.ComicConv.mobi;
import de.pkloes.ComicConv.mobi.Exceptions.InvalidMobiFileException;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
public class Tools {
final private static long MOBI_FORMAT_IDENTIFIER = 0x424f4f4b4d4f4249L;
final private static int MOBI_SRCS_RECORD_IDENTIFIER = 0x53524353;
final private static int MOBI_EXTH_RECORD_IDENTIFIER = 0x45585448;
final private static int MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER = 0x79;
public static void stripSRCS(File mobi, File stripped) throws IOException, InvalidMobiFileException {
FileChannel fich = null;
FileChannel foch = null;
try {
FileInputStream fin = new FileInputStream(mobi);
fich = fin.getChannel();
RandomAccessFile fout = new RandomAccessFile(stripped, "rw");
foch = fout.getChannel();
fout.setLength(0);
ByteBuffer bul = ByteBuffer.allocateDirect(8);
ByteBuffer bui = ByteBuffer.allocateDirect(4);
fich.read(bul, 0x3c);
if((getUInt(bul, 0) << 32 | getUInt(bul, 4)) != MOBI_FORMAT_IDENTIFIER) {
throw new InvalidMobiFileException("Unsupported Mobi Format");
}
bul.clear();
fich.read(bul, 76);
int record_count = bul.getChar(0);
long header_offset = getUInt(bul, 2);
bul.clear();
ByteBuffer data = ByteBuffer.allocateDirect((int) (header_offset + 0xe8));
fich.read(data, 0);
long srcs_record_number = getUInt(data, (int) (header_offset + 0xe0));
long srcs_record_count = getUInt(data, (int)(header_offset + 0xe4));
if(srcs_record_number == 0xffffffffL || srcs_record_count == 0) {
foch.transferFrom(fich, 0, fich.size());
return;
}
long strip_start_offset = getUInt(data, (int)((srcs_record_number * 8) + 78));
long strip_end_offset = getUInt(data, (int)(((srcs_record_number + srcs_record_count) * 8) + 78));
long strip_length = strip_end_offset - strip_start_offset;
fich.read(bul, strip_start_offset);
if(getUInt(bul, 0) != MOBI_SRCS_RECORD_IDENTIFIER) {
throw new InvalidMobiFileException("Broken SRCS Record");
}
bul.clear();
int new_record_count = (int)(record_count - srcs_record_count);
fich.transferTo(0, ((new_record_count) * 8) + 78, foch);
putUnsignedInt(bui, 0, ((new_record_count) * 2) + 1);
foch.write(bui, 68);
bui.rewind();
bui.putChar(0, (char) new_record_count);
foch.write(bui, 76);
long delta = srcs_record_count * -8;
for(int x = 0; x <= srcs_record_number; x++) {
bul.clear();
putUnsignedInt(bul, 0, getUInt(data, (x * 8) + 78) + delta);
putUnsignedInt(bul, 4, getUInt(data, (x * 8) + 82));
foch.write(bul, (x * 8) + 78);
}
delta = delta - strip_length;
for(int x = (int) (srcs_record_number + srcs_record_count); x < record_count; x++) {
bul.clear();
putUnsignedInt(bul, 0, getUInt(data, (x * 8) + 78) + delta);
putUnsignedInt(bul, 4, (x - srcs_record_count) * 2);
foch.write(bul, ((x - srcs_record_count) * 8) + 78);
}
bui.clear();
foch.read(bui, 78);
long first_record_offset = getUInt(bui, 0);
foch.write(ByteBuffer.allocateDirect((int)(first_record_offset - foch.size())));
fich.transferTo(header_offset, strip_start_offset - header_offset, foch);
fich.transferTo(strip_start_offset + strip_length, fich.size(), foch);
bul.clear();
putUnsignedInt(bul, 0, 0xffffffffL);
putUnsignedInt(bul, 4, 0);
foch.write(bul, first_record_offset + 0xe0);
bui.clear();
foch.read(bui, first_record_offset + 0x14);
long mobi_header_length = getUInt(bui, 0);
bui.clear();
foch.read(bui, first_record_offset + 0x80);
long exth_flag = getUInt(bui, 0);
long exth_start_offset = first_record_offset + mobi_header_length + 16;
if((exth_flag & 0x40) == 0x40) {
bui.clear();
foch.read(bui, exth_start_offset);
if(getUInt(bui, 0) == MOBI_EXTH_RECORD_IDENTIFIER) {
bui.clear();
foch.read(bui, exth_start_offset + 8);
long item_count = getUInt(bui, 0);
int pos = 12;
for(int x = 0; x < item_count; x++) {
bul.clear();
foch.read(bul, exth_start_offset + pos);
long item_type = getUInt(bul, 0);
long item_size = getUInt(bul, 4);
if(item_type == MOBI_EXTH_BOUNDARY_SECTION_IDENTIFIER) {
bui.clear();
foch.read(bui, exth_start_offset + pos + 8);
long boundary_pointer = getUInt(bui, 0);
if(srcs_record_number <= boundary_pointer) {
bui.clear();
putUnsignedInt(bui, 0, boundary_pointer - srcs_record_count);
foch.write(bui, first_record_offset + mobi_header_length + pos + 0x18);
}
}
pos += item_size;
}
}
}
}
finally {
if(fich != null) fich.close();
if(foch != null) foch.close();
}
}
private static void putUnsignedInt(ByteBuffer bb, int position, long value) {
bb.putInt(position, (int) (value & 0xffffffffL));
}
private static long getUInt(ByteBuffer b, int idx) {
return ((long) b.getInt(idx) & 0xffffffffL);
}
}
Big to pdurrant (and contributors!) for KindleStrip
Last edited by Padde; 12-21-2012 at 12:52 AM.
|