#!/usr/bin/perl

# GuteBook.pl v0.5 - Copyright (C) 2009 Nick Rapallo (nrapallo)
#
# Converts Project Gutenberg ("PG") and PG Australia HTML files from EText-No.
# or URL directly into many ebook formats using calibre ANY2LRF/EPUB/MOBI/LIT
# tools for Sony lrf/.epub, Mobipocket .mobi and MS .lit versions.  Also 
# generates .imp/.rb using ETI's eBook Publisher for the EBW1150, REB1200
# and Rocket/REB1100.
#
# Adapted from a very elegant .lrf solution; gutlrf.pl code written
# by FangornUK, 10th Nov 2006 and provided freely without licence.
# Extended by nrapallo, May 10, 2009. Both are MobileRead.com members.
#
# Also converts text files if Gutenmark is installed and in path 
#    http://www.sandroid.org/GutenMark/download.html
#
# Requires calibre installed (for .lrf/.epub/.mobi/.lit)
#    http://calibre.kovidgoyal.net/download
#
# Requires eBook Publisher installed (for .imp/.rb/.opf)
#    http://www.ebooktechnologies.com/support_publisher_download.htm
#
# REVISIONS:
#   v0.5 - June 22, 2009
#    - If (blank) file called 'calibreold' (no .ext) exists in install directory,
#      then use v0.5 (stable) calibre instead of new v0.6 (beta/release) calibre
#    - better allowed installation to different location than default "C:\Program Files".
#    - improved direct download of PG Australia ebooks.  Allowed local cached copy to
#      be retained using --keepzip or --keephtm; avoids subsequent PGA website downloads.
#    - implemented creation of eReader .pdb when using calibre v0.6 (beta)
#    - fixed handling of single dash ("-") options
#    - improved print statements feedback
#    - better handling of PGA metadata within .htm
#    - better handling of important/necessary text after "THE END" but before PGA blurb.
#    - allowed existing .txt CHARSET to be used for generated .htm meta content-type
#    - better handling of --pbnofirst when <h1> already used as a pb tag
#    - misc. PGA .htm fixes for color and removed fixed fontsize for <p> and <table>
#
#   v0.4 - June 10, 2009
#    - added ability to directly download PG Australia ebooks using their
#      EText-No. AND URL link to the .html placed as the Input file.
#      For example,
#      use:  --PGnum 1547A AND http://gutenberg.net.au/ebooks07/0700941h.html
#      Note that downloading .zip is fine, but .txt is not yet fully functional
#    - improved Custom Perl Search and Replace functionalilty.  Still need to 
#      use "\ for any " however due to dos limitation can't use ^ yet!
#    - minor code/html fixes.
#
#   v0.3 - June 4, 2009
#    - also add "start" anchor when the PG preamble is retained
#    - also remove any stray <br>'s from metadata.
#    - fixes GUI options loading; now properly remembers the 'search' and 'replace'
#      strings. The user must ensure that any " escaped by \ within those strings.
#    - simple PG title page added when --cover specified (would be better to take
#      a snapshot of this as a "cover" image)
#    - option -imgsrc (GUI: 'Extract cover') now also removes "width" elements
#      from within preceding <div class=figcenter> which caused images not to be
#      centered in .epub's
#   
#   v0.2 - June 3, 2009
#    - removed unwanted blank page at start in .lrf caused by use of tags '<pre></pre>'
#    - minor GUI / files cleanup
#   
#   v0.1 - June 2, 2009
#    - initial public release
#
#######################################################################
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################

use File::Copy;
use File::Basename;
use LWP::Simple;
use Getopt::Mixed;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use Cwd;

my ($destdir, $currdir, $getfile, $webfile, $textbook, $message, $gutversion, $prerange, $PGetext);
my ($BASENAME, $BASENOEXT, $BASEDIR, $PGA, $PGNAME);
my ($BOOKTITLE, $AUTHOR, $CATEGORY, $ILLUSTRATOR, $RELEASED, $LANGUAGE, $CHARSET, $EBOOKNUM);
my ($inputfile, $outfile, $cachedfile, $copyfile);
my ($htmlbook, $epubbook, $mobibook, $litbook, $impbook);
my ($default_fontsize);

$gutversion  = "version 0.5";
$textbook    = 0;
$prerange    = 0;
$txtfound    = 0;
$PGA	     = 0; 			#True(=1) means file is from Project Gutenberg Australia
$PGNAME      = "Project Gutenberg";	#if $PGA True, then set to "Project Gutenberg (Australia)"

$CATEGORY    = "";
$BOOKTITLE   = "";
$AUTHOR      = "";
$ILLUSTRATOR = "";
$RELEASED    = "";
$LANGUAGE    = "";
$CHARSET     = "";
$EBOOKNUM    = "";

#default page break for PG books is usually h2, otherwise use h3? or as the second one
my @pagebreak;
$pagebreak[0] = "h2";
$pagebreak[1] = "";

#default suffix for <pre> tag to make it more compatible for .mobi
my $fixpre1 = "<small><tt>";
my $fixpre2 = "</tt></small>";

#Formats/character sets other than plain ASCII; get extensions added that indicate type of file.
#Character sets get digits; formats get letters. The most common of these are:
#
# -h for HTML (default source)
# -0 for Unicode 
# -8 for 8-bit plain text 
# ASCII text has no suffix
#Order of txt preference: "-0" (UTF-8), "-8" (8-bit), "" ASCII
my @txtsuffixes = ("-0", "-8", ""); 
my ($txtending, $txtin, $txtout);

use vars qw ($opt_a $opt_author $opt_t $opt_title $opt_c $opt_category $opt_outdir 
	$opt_1200 $opt_1150 $opt_1100 $opt_epub $opt_lrf $opt_mobi $opt_lit
	$opt_srcepub $opt_pdb $opt_zip $opt_keepzip $opt_keephtm $opt_usegm
	$opt_LRmargins $opt_indent $opt_fixpre1 $opt_fixpre2 $opt_p $opt_pb $opt_nobatch
	$opt_v $opt_verbose $opt_debug $opt_nojustify $opt_noimagefix $opt_nopara
	$opt_pbwithin $opt_pbfirsth1 $opt_pbnofirst $opt_pbtoc $opt_noPGtrailer
	$opt_cover $opt_imgsrc $opt_PGheader $opt_PGfooter $opt_PGpagenum
	$opt_tocname $opt_addtoc $opt_addtocend $opt_centerh $opt_smallerfont
	$opt_largerfont $opt_search $opt_replace $opt_modi $opt_modg $opt_PGnum,
	$opt_h, $opt_help, $opt_calibreold );

Getopt::Mixed::getOptions ("a=s author>a t=s title>t c=s category>c outdir=s
	1200 1150 1100 epub lrf mobi lit srcepub pdb zip keepzip keephtm usegm
	LRmargins=s indent=s fixpre1=s fixpre2=s p=s pb>p nobatch v verbose>v debug
	nojustify noimagefix nopara pbwithin pbfirsth1 pbnofirst pbtoc noPGtrailer
	cover imgsrc PGheader PGfooter PGpagenum tocname addtoc addtocend centerh 
	smallerfont largerfont search=s replace=s modi modg PGnum=s h help>h calibreold");

print "GuteBook ($gutversion) Copyright (C) 2009 Nick Rapallo (nrapallo)\n";

if ($#ARGV == -1 || $opt_h)
{
	print "Usage: gutebook.pl [options] [Project Gutenberg EText-No. | link to ZIP|HTML]\n";
	print "where [options] include:\n";
	print "  -a, --author   \"AUTHOR\"   override the Author name detected\n";
	print "  -t, --title    \"TITLE\"    override the Book title detected\n";
	print "  -c, --category \"CATEGORY\" override default \"Project Gutenberg\"\n";
	print "  -h, --help     command line help screen (also seen with no parameters)\n\n";
	print " Input/Source:\n";
	print "  --PGnum #     override EText-No. detection if no # in Input File name\n";
	print "  --keepzip     keep PG .zip file downloaded (local/cache copy for re-edits)\n";
	print "  --keephtm     keep PG .htm file extracted from downloaded .zip (or .txt)\n";
	print "  --usegm       use GutenMark for internal .txt to .htm; otherwise abort\n\n";
	print " Output formats:\n (any or all)\n";
	print "  --1150 --1200 eBookwise .imp created by eBook Publisher\n";
	print "  --1100        Rocket eBook .rb created by eBook Publisher\n";
	print "  --epub --lrf  Sony PRS .epub/.lrf created by calibre\n";
	print "  --mobi        Mobipocket .mobi created by calibre\n";
	print "  --lit         Microsoft .lit created by calibre\n";
	print "  --srcepub     single .xhtml (non-Sony) .epub created by calibre\n";
	print "  --pdb         eReader .pdb created by calibre\n";
	print "  --zip         Not yet implemented - reserved for calibre .zip\n\n";
	print " Output options:\n";
	print "  --outdir DIR  specify DIR where converted ebooks placed; default install dir\n";
	print "  --nobatch     do not created dos batch file for later re-edits\n";
	print "  -v, --verbose printout messages about this conversion\n";
	print "  --debug       printout more detailed messages about the conversion\n\n";
	print " HTML options:\n";
	print "  --LRmargins \$ specify overall <body> left/right margins; default \$=\"2%\"\n";
	print "  --indent  \$   specify overall <body> para. text indents; default \$=\"2em\"\n";
	print "  --fixpre1 \$   suffix for  <pre> for .mobi; default \$=\"<small><tt>\"\n";
	print "  --fixpre2 \$   prefix for </pre> for .mobi; default \$=\"</tt></small>\"\n";
	print "  -p, --pb  \$   pagebreaks on max. 2 HTML Tags, like \$=\"h1 h2\"; default \$=\"h2\"\n";
	print "  --nojustify   specify no <body> justification; default is justified text\n";
	print "  --nopara      specify no <body> para. separation; default is blank line sep.\n";
	print "  --pbwithin    pagebreak tags within anchor links to Chapter headings (mobi)\n";
	print "  --pbnofirst   ignore pagebreak on first pagebreak HTML Tag\n";
	print "  --pbfirsth1   force pagebreak on first <h1>\n";
	print "  --pbtoc       force pagebreak at TOC location\n";
	print "  --tocname     omit the \"toc\" anchor name inserted before TOC (.mobi/.imp)\n";
	print "  --noPGtrailer do not insert PG trailer (Booktitle/Author/Released/EText-No.)\n";
	print "  --PGheader    retain PG header (preamble); default is to strip it out\n";
	print "  --PGfooter    retain PG footer (legalese); default is to strip it out\n";
	print "  --PGpagenum   retain/display PG page numbers; default is to strip them out\n";
	print "  --imgsrc      strip all except \"src=\" within <img> tags\n";
	print "  --centerh     force all <h1> to <h6> tags to be centered.\n";
	print "  --smallerfont specify overall <body> text a font-size smaller\n";
	print "  --largerfont  specify overall <body> text a font-size larger\n";
	print "  --search      Custom Perl RegEx search string expression; use \\\" for any \"\n";
	print "  --replace     Custom Perl RegEx replace string expression; use \\\" for any \"\n";
	print "  --modi        Custom Perl RegEx \"i\" modifier for case indifferent\n";
	print "  --modg        Custom Perl RegEx \"g\" modifier for global replacements\n";
	print "  --noimgfix    Not yet implemented - do not re-save images for compatibility\n";
	print "  --cover       Not yet implemented - extract \"cover image\" into new cover.htm\n";
	print "  --addtoc      Not yet implemented - create TOC from pagebreak Tags\n";
	print "  --addtocend   Not yet implemented - place created TOC above at end\n\n";
	print "For more about these HTML options/fixes/filters, see GuteBook-readme.txt\n\n";
	print "If (blank) file called 'calibreold' (no .ext) exists in install directory,\n";
	print "then use v0.5 (stable) calibre instead of new v0.6 (beta/release) calibre\n\n";
	print "Retrieves the specified Project Gutenberg file, unzips it and filters it.\n";
	print "Provide the PG Etext number and it will try and download the relevant\n";
	print "HTML (or text) version.  Alternatively, you can specify a previously\n";
	print "downloaded ZIP file or an already extracted PG HTML file.\n";
	print "  e.g. gutebook.pl 17297\n";
	print "  e.g. gutebook.pl http://www.gutenberg.org/files/17297/17297-h.zip\n";
	print "  e.g. gutebook.pl c:\\dl\\17297-h.zip\n";
	print "  e.g. gutebook.pl c:\\dl\\17297-h\\17297-h.htm\n";
	exit 1;
}
my ($HREF) = @ARGV;

$PGetext = $HREF;
$PGetext =~ s/^[^0-9]*([0-9]+[A]*).*$/$1/;

#override PGEtext-No. detection for cases when Input File doesn't contain it.
if ($opt_PGnum)
{
	$PGetext = $opt_PGnum;
}
if ($PGetext =~ /^[0-9]+A$/)
{
	$PGA = 1;
	$PGNAME = "Project Gutenberg (Australia)";
	$pagebreak[0] = "h2";				#allows default to be set for PGA
	$pagebreak[1] = "";
}
$CATEGORY = $PGNAME;

if ($opt_p)
{
	@pagebreak = split(/ /, $opt_p);
}

$fixpre1 = $opt_fixpre1 unless not defined $opt_fixpre1;
$fixpre2 = $opt_fixpre2 unless not defined $opt_fixpre2;

$currdir = &Cwd::cwd();
if ($opt_outdir)
{
	$destdir = File::Spec->catfile($opt_outdir,$PGetext);
}
else
{
	$destdir = File::Spec->catfile($currdir,$PGetext);
}
mkdir $destdir;
chdir $destdir; 

if ($HREF =~ /^[0-9]+$/ )
{
	my $TEMPREF;
	#Only referenced number of etext so expand it
	#Attempt to get the HTML version first
	$TEMPREF = "http://www.gutenberg.org/files/$HREF/$HREF-h.zip";
	if ($HREF==711 || $HREF==3824 || $HREF==10018)
	{ 
		$TEMPREF = "http://www.gutenberg.org/files/$HREF/$HREF-h/$HREF-h.zip";
	}
	#but don't retrieve it if we already have a cached local copy
	$message = "Getting \"$HREF\" HTML file\nfrom local cached .zip copy instead";
	$cachedfile = File::Spec->catfile($destdir,$HREF."-h.zip");
	if (! -e $cachedfile)
	{
		$message = "Getting \"$HREF\" HTML file\nfrom $PGNAME Website";
		($content_type, $document_length, $modified_time, $expires, $server) = head($TEMPREF);
		if (! $content_type)
		{
			#Not found so try and get the text version in order of preference
			$message = "Failed to get \"$HREF" . $txtsuffix ."\" Text file\nfrom $PGNAME Website";
			foreach my $txtsuffix (@txtsuffixes) { 
				if ($textbook)
				{
					next;
				}
				else
				{
					$cachedfile = File::Spec->catfile($destdir,$HREF.$txtsuffix.".zip");
				}
				if (! -e $cachedfile)
				{
					$TEMPREF = "http://www.gutenberg.org/files/$HREF/$HREF" . $txtsuffix . ".zip";
			  		($content_type, $document_length, $modified_time, $expires, $server) = head($TEMPREF);
					if ($content_type)
					{
						$message = "Getting \"$HREF" . $txtsuffix ."\" Text file\nfrom $PGNAME Website";
						$txtending = $txtsuffix;
						$textbook = 1;
					}
				}
				else
				{
					$txtending = $txtsuffix;
					$textbook = 1;
					$TEMPREF = "http://www.gutenberg.org/files/$HREF/$HREF" . $txtending . ".zip";
					$message = "Getting \"$HREF" . $txtending ."\" Text file from\nlocal cached .zip copy instead";
				}
			}
		}
	}
	$HREF = $TEMPREF;
}

if ($HREF =~ /htm[l]*$/i )
{
	#This is an already unzipped PG html file
	#NR: this may be broken now; more testing required...
	my ($filename, $dirbase, $suffix) = fileparse($HREF);
	$BASENOEXT = $filename;
	$BASENOEXT =~ s/\.htm[l]*$//i;
	$BASEDIR = $destdir;
	$getfile = "";
	$webfile = 0;
	$extractfile = 0;
	if ( -e $BASENOEXT."_PGA.htm" && $PGA) 
	{
		copy($BASENOEXT."_PGA.htm",$BASENOEXT.".htm");
		$inputfile = File::Spec->catfile($BASEDIR,$BASENOEXT.".htm");
		$document_length = -s $BASENOEXT.".htm";
		print "Using existing \"$BASENOEXT.htm\" file from\nlocal cached copy instead\n";
	}
	else
	{
		$inputfile = $HREF;
	}
	$outdir = $BASEDIR;  
	
	#need to avoid download ans use cached (local) copy
	if ($HREF =~ /http:/i && (! -e $BASENOEXT."_PGA.htm") && $PGA )
	{
		print "Getting \"$PGetext\" HTML file from $PGNAME Website\nPlease Wait... ";
		system ("web2disk -r 0 --dont-download-stylesheets \"$HREF\" ") == 0 || warn $!;
		$document_length = -s $BASENOEXT.".xhtml";
		if (-e $BASENOEXT.".xhtml")
		{
			if ($opt_keephtm || $opt_keepzip)
			{
				copy($BASENOEXT.".xhtml",$BASENOEXT."_PGA.htm");
			}
			move($BASENOEXT.".xhtml",$BASENOEXT.".htm");
			if ($document_length < 1048576)
			{
				printf("Renaming %0.1fKB .xhtml to .htm\n", $document_length/1024);
			}
			else
			{
				printf("Renaming %0.1fMB .xhtml to .htm \n", $document_length/1048576);
			}
		}
		$inputfile=File::Spec->catfile($BASEDIR,$BASENOEXT.".htm");
	}
}
elsif ($HREF =~ /http:/i )
{
	#This is a HTML reference to the ZIP file
	$BASENAME = substr($HREF, rindex($HREF, '/') + 1);
	$BASENOEXT = $BASENAME;
	$BASENOEXT =~ s/.zip//;
	if (!$PGA)
	{
		$BASEDIR = $BASENOEXT;
	}
	else
	{
		$BASEDIR = $PGetext;
	}
	$getfile = File::Spec->catfile($destdir,$BASENAME);
	$webfile = 1;
	$extractfile = 1;
	$inputfile = File::Spec->catfile($destdir,$BASEDIR,"$BASENOEXT.htm");
	$outdir = File::Spec->catdir($destdir,$BASEDIR);
	if (! -e $cachedfile)
	{
		$message = "Getting \"$HREF\" HTML file\nfrom $PGNAME Website";
		($content_type, $document_length, $modified_time, $expires, $server) = head($HREF);
	}
}
else
{
	#This is a zip file already downloaded
	my ($BASENAME, $dirbase, $suffix) = fileparse($HREF);		
	$BASENOEXT = $BASENAME;
	$BASENOEXT =~ s/.zip//;
	if (!$PGA)
	{
		$BASEDIR = $BASENOEXT;
	}
	else
	{
		$BASEDIR = $PGetext;
	}
	$getfile = $HREF;
	$webfile = 0;
	$extractfile = 1;
	$inputfile = File::Spec->catfile($destdir,$BASEDIR,"$BASENOEXT.htm");
	$outdir = File::Spec->catdir($destdir,$BASEDIR);
}

if (! -e $getfile && $webfile == 1)
{
	if ($document_length < 1048576)
	{
		printf("%s...\nFetching %0.1fKB...\n", $message, $document_length/1024);
	}
	else
	{
		printf("%s...\nFetching %0.1fMB...\n", $message, $document_length/1048576);
	}
	if (is_error(getstore($HREF, $getfile)))
	{
		print "Couldn't download \"$getfile\" file!\n";
		exit 1;
	}
}
else
{
	printf("%s.\n", $message) unless not $message;
}

if ($extractfile)
{
	my $members, $extractdir;
	print "Extracting files...\n";

	#Unzip the files into the destination directory, structure is usually
	#always the same for Project Gutenberg files
	my $zip = Archive::Zip->new();
	die "Error reading $zipfile:$!" unless $zip->read( "$getfile" ) == AZ_OK; 

	#Some Gutenberg ZIP files incorrectly miss off the subdirectory, check
	#to see if exists, otherwise create it or rest of script won't work
 	$members = $zip->memberNamed("$BASEDIR/$BASENOEXT.htm");
	if ($members eq "")
	{
		#Check for a text version
		$members = $zip->memberNamed("$BASENOEXT.txt");
		if ($members ne "")
		{
			$textbook = 1;
			$inputfile = File::Spec->catfile($outdir,"$BASENOEXT-h.htm");
		}
		else
		{
			print "Archive missing directory, correcting...\n";
		}
		$extractdir = $outdir;
	}
	else
	{
		$extractdir = $destdir;
	}

	die "Error extracting $zipfile:$!" unless $zip->extractTree( '', "$extractdir/") == AZ_OK; 

	#retain a copy of the original PG file
	if ($opt_keephtm)
	{
		$copyfile = File::Spec->catfile($outdir,"$BASENOEXT\_PG.htm");
		copy($inputfile,$copyfile);			
	}
	
	#delete downloaded PG file, otherwise retain it as a local cached copy
	unlink $BASENAME unless $opt_keepzip;
}

if ($textbook)
{
	print "Converting text book to .htm ";
	$htmlbook = File::Spec->catfile($BASEDIR,$BASENOEXT."-h.htm");
	$outfile = File::Spec->catfile($outdir, $BASENOEXT."-h.htm");
	$txtin   = File::Spec->catfile($outdir,"$BASENOEXT.txt");
	$txtout  = File::Spec->catfile($outdir,"$BASENOEXT-h.htm");
	#given $txtin (.txt) produces $txtout (.htm)
	if ($opt_usegm)
	{
		print "using Gutenmark...\n";
		system ("gutenmark --no-prefatory --yes-header --page-breaks \"$txtin\" \"$txtout\" ") == 0 || die $!;
	}
	else
	{
		print "without Gutenmark not yet available; use \"--usegm\" switch.  Aborting!\n";
		exit 1;
	}
}
else
{
	$htmlbook=File::Spec->catfile($BASEDIR,$BASENOEXT.".htm");
	$outfile=File::Spec->catfile($outdir, $BASENOEXT.".htm");
}

#First we're getting the author and title
open (INFILE, $inputfile);
my @lines = <INFILE>;                 # Read it into an array
close(INFILE);

#Extract Book Title, Author and other items on first pass through file
my $line;
foreach $line (@lines) {              # assign @lines to $line, one at a time
	#minor fixes
	$line =~ s/&#160; / /g;
	$line =~ s/&#160;/ /g;
        $line =~ s/<pre\s*{/pre {/gi; #fix ill-formed CSS

	if ( $line =~ m/Title:/i && !$BOOKTITLE )
	{
		$BOOKTITLE = substr($line, index($line, "Title:")+7);
		$BOOKTITLE =~ s#(<[^>]*>)##gi;
		$BOOKTITLE =~ s#^\s*##i;
		$BOOKTITLE =~ s/&#150;/-/g;
		$BOOKTITLE =~ s/\x96/-/g;
		#$BOOKTITLE =~ s/[^A-Za-z0-9^ ^-]+//g;
		$BOOKTITLE =~ s/[\/?<>\\:\*\|"\n]//g;
		print "\nBook Title : $BOOKTITLE\n";
	}
	if ( $line =~ m/Author:/i && !$AUTHOR )
	{
		$AUTHOR = substr($line, index($line, "Author:")+8);
		$AUTHOR =~ s#(<[^>]*>)##gi;
		$AUTHOR =~ s#^\s*##i;
		$AUTHOR =~ s/&#150;/-/g;
		$AUTHOR =~ s/\x96/-/g;
		#$AUTHOR =~ s/[^A-Za-z0-9^ ^-]+//g;
		$AUTHOR =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Author     : $AUTHOR\n";
	}
	if ( $line =~ m/Illustrator:/i && !$ILLUSTRATOR )
	{
		$ILLUSTRATOR = substr($line, index($line, "Illustrator:")+13);
		$ILLUSTRATOR =~ s#(<[^>]*>)##gi;
		$ILLUSTRATOR =~ s#^\s*##i;
		$ILLUSTRATOR =~ s/&#150;/-/g;
		$ILLUSTRATOR =~ s/\x96/-/g;
		$ILLUSTRATOR =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Illustrator: $ILLUSTRATOR\n";
	}
	if ( $line =~ m/Release Date:/i && !$RELEASED )
	{
		$RELEASED = substr($line, index($line, "Release Date:")+14);
		$RELEASED =~ s#(<[^>]*>)##gi;
		$RELEASED =~ s#^\s*##i;
		$RELEASED =~ s/&#150;/-/g;
		$RELEASED =~ s/\x96/-/g;
		$RELEASED =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Released   : $RELEASED\n";
	}
	if ( $line =~ m/Date first posted:/i && !$RELEASED )
	{
		$RELEASED = substr($line, index($line, "Date first posted:")+19);
		$RELEASED =~ s#(<[^>]*>)##gi;
		$RELEASED =~ s#^\s*##i;
		$RELEASED =~ s/&#150;/-/g;
		$RELEASED =~ s/\x96/-/g;
		$RELEASED =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Released   : $RELEASED\n";
	}
	if ( $line =~ m/Language:/i && !$LANGUAGE )
	{
		$LANGUAGE = substr($line, index($line, "Language:")+10);
		$LANGUAGE =~ s#(<[^>]*>)##gi;
		$LANGUAGE =~ s#^\s*##i;
		$LANGUAGE =~ s/&#150;/-/g;
		$LANGUAGE =~ s/\x96/-/g;
		$LANGUAGE =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Language   : $LANGUAGE\n";
	}
	if ( $line =~ m/eBook No.:/i && !$EBOOKNO )
	{
		$EBOOKNO = substr($line, index($line, "eBook No.:")+11);
		$EBOOKNO =~ s#(<[^>]*>)##gi;
		$EBOOKNO =~ s#^\s*##i;
		$EBOOKNO =~ s/&#150;/-/g;
		$EBOOKNO =~ s/\x96/-/g;
		$EBOOKNO =~ s/[\/?<>\\:\*\|"\n]//g;
		print "eBook No.  : $EBOOKNO (PG Australia)\n";
	}
	if ( $line =~ m/Character set encoding:/i && !$CHARSET )
	{
		$CHARSET = substr($line, index($line, "Character set encoding:")+24);
		$CHARSET =~ s#(<[^>]*>)##gi;
		$CHARSET =~ s#^\s*##i;
		$CHARSET =~ s/&#150;/-/g;
		$CHARSET =~ s/\x96/-/g;
		$CHARSET =~ s/[\/?<>\\:\*\|"\n]//g;
		print "Char. Set  : $CHARSET\n" unless not $textbook;
	}

	#Convert <pre> tags to something usable in .mobi files
	if ( ($line =~ m/<pre>/i) && ($prerange == 0) ) 
	{ 
		$line =~ s#<pre>#<pre>$fixpre1#i;
		$prerange = 1;
	}
	if ( ($line =~ m/<\/pre>/i) && ($prerange == 1) )
	{ 
		$line =~ s#</pre>#$fixpre2</pre>#i;
		$prerange = 0;
	}

	$text .= $line;  
}

if (!$AUTHOR)      { $AUTHOR    = "Unspecified"; }
if (!$BOOKTITLE)   { $BOOKTITLE = "Unspecified"; }

if ($opt_a) { $AUTHOR    = $opt_a; }
if ($opt_t) { $BOOKTITLE = $opt_t; }
if ($opt_c) { $CATEGORY  = $opt_c; print "Category  : $CATEGORY\n"; }

print "\nCleaning \"$PGetext\" HTML...\n";

#Cleanup the Project Gutenberg HTML to remove/add things like "page numbers", "toc" name
#Also remove the Project Gutenberg disclaimer preamble (header) & legalese (footer)

#Strip the Project Gutenberg preamble
#added comment, <!--Start-->, required so as to not break another text substitution below
#remember <pre> now possibly <pre>$fixpre1 i.e. <pre><small><tt>
if (!$PGA)
{
	if (!$opt_PGheader) { 
		$text =~ s#<pre>$fixpre1(\s)*(The )*Project Gutenberg([^<])*$fixpre2</pre>(\s|<br[^>]*>)*#<a name="start" id="start">\n<!--Start-->\n#i; 
	
		#for txt documents prepared by Gutenmark
		$text =~ s#<h1>Project Gutenberg Fine Print</h1>\s*#<a name="start" id="start">\n<!--Start-->\n#i; 
		$text =~ s#<DIV class=NOPRINT>\s*<h1>Prefatory Materials</h1>(\s|\S)*</DIV>#<a name="start" id="start">\n<!--Start-->\n#i;
		$text =~ s#<body([^>]*)>(\s|\S)*<p>(\s|\*)*START OF TH(E|IS) PROJECT GUTENBERG([^<])*</p>#<body$1>\n<a name="start" id="start">\n<!--Start-->\n#i;
	}
	else
	{
		#otherwise just add "start" anchor after PG preamble
		$text =~ s#(<pre>$fixpre1(\s)*(The )*Project Gutenberg([^<])*$fixpre2</pre>(\s|<br[^>]*>)*)#$1\n<a name="start" id="start">\n<!--Start-->\n#i; 
	
		#for txt documents prepared by Gutenmark
		$text =~ s#(<h1>Project Gutenberg Fine Print</h1>\s*)#$1\n<a name="start" id="start">\n<!--Start-->\n#i; 
		$text =~ s#(<DIV class=NOPRINT>\s*<h1>Prefatory Materials</h1>(\s|\S)*</DIV>)#$1\n<a name="start" id="start">\n<!--Start-->\n#i;
		$text =~ s#(<body([^>]*)>(\s|\S)*<p>(\s|\*)*START OF TH(E|IS) PROJECT GUTENBERG([^<])*</p>)#<body$2>\n$1\n<a name="start" id="start">\n<!--Start-->\n#i;
	}
	
	if (!$opt_PGfooter)
	{ 
		$text =~ s#<pre>$fixpre1(\s|\*)*End of (the )*Project Gutenberg(\s|\S)*#</body>\n</html>#i;
	
		#for txt documents prepared by Gutenmark
		$text =~ s#<[^>]*>(\s|\*)*End of (the )*Project Gutenberg(\s|\S)*#</body>\n</html>#i;
	}
}
else
{
	if (!$opt_PGheader)
	{ 
		$text =~ s#(\s*<script[^>]*>.*?</script>\s*)##igs; 
		$text =~ s#<p[^>]*>\s*</p>\s*##ig; 
		$text =~ s#<p[^>]*>\s*<img .*?></p>\s*<table[^>]*>.*?</table>\s*##igs; 
		$text =~ s#<pre>$fixpre1.*?$fixpre2</pre>#<a name="start" id="start">\n<!--Start-->\n#is; 
		$text =~ s#<p[^>]*>\s*<b>go .*?</b></p>\s*(<hr[^>]*>\s*)*##is; 
		
		#for txt documents prepared by Gutenmark    
		#$text =~ s#<h1>Project Gutenberg (Australia) Fine Print</h1>\s*#<a name="start" id="start">\n<!--Start-->\n#i; 
		$text =~ s#<DIV class=NOPRINT>\s*<h1>Prefatory Materials</h1>(\s|\S)*</DIV>#<a name="start" id="start">\n<!--Start-->\n#i;
		#$text =~ s#<body([^>]*)>(\s|\S)*<p>(\s|\*)*START OF TH(E|IS) Project Gutenberg (Australia)([^<])*</p>#<body$1>\n<a name="start" id="start">\n<!--Start-->\n#i;
	}
	else
	{
		#otherwise just add "start" anchor after PG preamble
		$text =~ s#(<pre>$fixpre1.*?$fixpre2</pre>)#$1\n<a name="start" id="start">\n<!--Start-->\n#is; 
	
		#for txt documents prepared by Gutenmark
		#$text =~ s#(<h1>Project Gutenberg (Australia) Fine Print</h1>\s*)#$1\n<a name="start" id="start">\n<!--Start-->\n#i; 
		$text =~ s#(<DIV class=NOPRINT>\s*<h1>Prefatory Materials</h1>(\s|\S)*</DIV>)#$1\n<a name="start" id="start">\n<!--Start-->\n#i;
		#$text =~ s#(<body([^>]*)>(\s|\S)*<p>(\s|\*)*START OF TH(E|IS) Project Gutenberg (Australia)([^<])*</p>)#<body$2>\n$1\n<a name="start" id="start">\n<!--Start-->\n#i;
	}

	if (!$opt_PGfooter)
	{ 
		$text =~ s#<h2>THE END</h2>(.*?)<p align=[^>]*>\s*<img (\s|\S)*#<h2>THE END</h2>\n$1</body>\n</html>#is; 
	
		#for txt documents prepared by Gutenmark
		#$text =~ s#<[^>]*>(\s|\*)*End of (the )*Project Gutenberg (Australia)(\s|\S)*#</body>\n</html>#i;
		$text =~ s#<p><i>The</i> <i>end</i></p>(.*?)<p align=[^>]*>(\s|\S)*#<h2>THE END</h2>\n$2</body>\n</html>#is; 
	}
}

#by default, insert "toc" anchor name for .mobi ebooks (once)
if (!$opt_tocname)
{
	if ($opt_pbtoc)
	{
		if ($opt_pbwithin)
		{
			$text =~ s#<([^>]*)>(\s*[^>]*.*?Contents[:.]*\s*</[^>]*>)#<$1 name="toc" id="toc" style="page-break-before: always">$2#i;
		}
		else
		{
			$text =~ s#<([^>]*)>(\s*[^>]*.*?Contents[:.]*\s*</[^>]*>)#<$1 style="page-break-before: always"><a name="toc" id="toc">$2#i;
	#never used	$text =~ s#<([^>]*)>(\s*[^>]*.*?Contents[:.]*\s*</[^>]*>)#<a name="toc" id="toc"><$1 style="page-break-before: always">$2#i;
		}
	}
	else
	{
		$text =~ s#<([^>]*)>(\s*[^>]*Contents[:.]*\s*</[^>]*>)#<$1 name="toc" id="toc">$2#i;
	}
}

#Remove page numbering
if (!$opt_PGpagenum)
{
	$text =~ s#<span class='pagenum[^']*'>([\sivxcldm]*)</span>#<!--Pagenum-->#gi;
	$text =~ s#<span class=\"pagenum[^"]*\">([\sivxcldn]*)</span>#<!--Pagenum-->#gi;

	$text =~ s#<span class='pagenum[^']*'><([^>]*)>(</a>)*([^<]*)(</a>)*</span>#<$1>$2$4<!--Pagenum-->#gi;
	$text =~ s#<span class=\"pagenum[^"]*\"><([^>]*)>(</a>)*([^<]*)(</a>)*</span>#<$1>$2$4<!--Pagenum-->#gi;
	$text =~ s#<span class=\"tei tei-pb\".*</span>##gi;
}

#Ensure anchor links to Chapter headings start on a new page with pagebreak HTML h tags
if ($opt_pbwithin)
{
	$text =~ s#<a (id=|name=)([^>]*)>\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<hr([^>]*)>\s*<$pagebreak[0]#$5\n<hr$6>\n$3\n<$pagebreak[0] $1$2$4#gi;
	$text =~ s#(<p>)*\s*<a([^>]*)>\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<$pagebreak[0]#$3\n<$pagebreak[0]$2#gi;
	if ($pagebreak[1])
	{
		$text =~ s#<a (id=|name=)([^>]*)>\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<hr([^>]*)>\s*<$pagebreak[1]#$5\n<hr$6>\n$3\n<$pagebreak[1] $1$2$4#gi;
		$text =~ s#(<p>)*\s*<a([^>]*)>\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<$pagebreak[1]#$3\n<$pagebreak[1]$2#gi;
	}
}
else
{
	$text =~ s#<a (id=|name=)\s*([^>]*>)\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<hr([^>]*)>\s*<$pagebreak[0]([^>]*>)#$5\n<hr$6>\n$3\n<$pagebreak[0]$7<a $1$2$4#gi;
	$text =~ s#(<p>)*\s*<a([^>]*>)\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<$pagebreak[0]([^>]*)>#$3\n<$pagebreak[0]$6><a$2$4#gi;
	if ($pagebreak[1])
	{
		$text =~ s#<a (id=|name=)\s*([^>]*>)\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<hr([^>]*)>\s*<$pagebreak[1]([^>]*>)#$5\n<hr$6>\n$3\n<$pagebreak[1]$7<a $1$2$4#gi;
		$text =~ s#(<p>)*\s*<a([^>]*>)\s*(<!--[^>]*>)*\s*(</a>)*\s*(</p>)*\s*<$pagebreak[1]([^>]*)>#$3\n<$pagebreak[1]$6><a$2$4#gi;
	}
}

#Tidy up book title
if (!$opt_PGheader)
{
	$text =~ s#The Project Gutenberg eBook (of\s*)*##gi;
	$text =~ s#The Project Gutenberg E-text (of\s*)*##gi;
}
	
#Add Author and Book Title to HTML meta
$text =~ s#<head>#<head>\n\t<meta name="generator" content="GuteBook by Nick Rapallo (nrapallo)">#i;
$text =~ s#<head>#<head>\n\t<meta name="publisher" content="$PGNAME">#i;
$text =~ s#<head>#<head>\n\t<meta name="author"    content="$AUTHOR">#i;
$text =~ s#<head>#<head>\n\t<meta name="title"     content="$BOOKTITLE">#i;
$text =~ s#<head>#<head>\n\t<meta name="EText-No." content="$PGetext">#i;
if ($textbook)
{
	if ($CHARSET)
	{
		$text =~ s#<head>#<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=$CHARSET" />#i;
	}
	else
	{
		$text =~ s#<head>#<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />#i;
	}
}

if ($textbook)
{
	$text =~ s#<!--NewPage--\>#<p style="page-break-before: always"></p>#gi;
	# Put page break in the CSS header for HTML2LRF
	if ($pagebreak[1])
	{
		if ($opt_centerh)
		{
			$text =~ s#</head>#<STYLE TYPE="text/css">\n    h1, h2, h3, h4, h5, h6 {text-align: center }\n    $pagebreak[0], $pagebreak[1] { page-break-before: always; }\n  </style>\n</head>#i;
		}
		else
		{
			$text =~ s#</head>#<STYLE TYPE="text/css">\n    $pagebreak[0], $pagebreak[1] { page-break-before: always; }\n  </style>\n</head>#i;
		}
	}
	else
	{
		if ($opt_centerh)
		{
			$text =~ s#</head>#<STYLE TYPE="text/css">\n    h1, h2, h3, h4, h5, h6 {text-align: center }\n    $pagebreak[0] { page-break-before: always; }\n  </style>\n</head>#i;
		}
		else
		{
			$text =~ s#</head>#<STYLE TYPE="text/css">\n    $pagebreak[0] { page-break-before: always; }\n  </style>\n</head>#i;
		}
	}
}
else
{
	# Put pagebreak in the CSS header
	if ($pagebreak[1])
	{
		if ($opt_centerh)
		{
			$text =~ s#</style>#  h1, h2, h3, h4, h5, h6 {text-align: center }\n    $pagebreak[0], $pagebreak[1] { page-break-before: always; }\n  </style>#i;
		}
		else
		{
			$text =~ s#</style>#  $pagebreak[0], $pagebreak[1] { page-break-before: always; }\n  </style>#i;
		}
	}
	else
	{
		if ($opt_centerh)
		{
			$text =~ s#</style>#  h1, h2, h3, h4, h5, h6 {text-align: center }\n    $pagebreak[0] { page-break-before: always; }\n  </style>#i;
		}
		else
		{
			$text =~ s#</style>#  $pagebreak[0] { page-break-before: always; }\n  </style>#i;
		}
	}
}
#Pagebreak on first h1 heading
$text =~ s#<h1#<h1 style="page-break-before: always" #i unless not defined $opt_pbfirsth1;

#Pagebreak not on first pagebreak HTML Tag; "avoid" does not override the inherited "always",
#so change pb tag to <h1> (doesn't work though if $pagebreak is <h1>)
if ($pagebreak[0] eq "h1" || $pagebreak[0] eq "H1" || $pagebreak[1] eq "h1" || $pagebreak[1] eq "H2")
{
	$text =~ s#<$pagebreak[0]([^>]*>)([^>]*>)#<$pagebreak[0] style="page-break-before: avoid" $1$2#i unless not defined $opt_pbnofirst;
}
else
{
	$text =~ s#<$pagebreak[0]([^>]*>)([^<]*<)([^>]*>)#<h1 style="page-break-before: avoid" $1$2/h1>#i unless not defined $opt_pbnofirst;
}

#Change default margins in CSS and/or body tag
$opt_LRmargins = "2%" unless defined $opt_LRmargins;
$opt_indent = "2em" unless defined $opt_indent;
if (defined $opt_smallerfont)
{
    $default_fontsize = "font-size: -1;";
}
elsif (defined $opt_largerfont)
{
    $default_fontsize = "font-size: +1;";
}
else
{
    $default_fontsize = "";
}
if ($opt_nojustify)
{
	$text =~ s#body\s*{\s*[^}]*}#body { margin-left: $opt_LRmargins; margin-right: $opt_LRmargins; text-align: left; text-indent: $opt_indent; $default_fontsize}#i;
}
else
{
	$text =~ s#body\s*{\s*[^}]*}#body { margin-left: $opt_LRmargins; margin-right: $opt_LRmargins; text-align: justify; text-indent: $opt_indent; $default_fontsize}#i;
}
if ($text =~ m/<body>/)
{
	if ($opt_nojustify)
	{
		$text =~ s#<body>#<body style="margin-left: $opt_LRmargins; margin-right: $opt_LRmargins; text-align: left; text-indent: $opt_indent; $default_fontsize">#i;
	}
	else
	{
		$text =~ s#<body>#<body style="margin-left: $opt_LRmargins; margin-right: $opt_LRmargins; text-align: justify; text-indent: $opt_indent; $default_fontsize">#i;
	}
}
if ($opt_nopara)
{
	$text =~ s#</style>#    p {text-indent: $opt_indent; margin-top: 0em; margin-bottom: 0em;  padding-top: 0em; padding-bottom: 0em}\n  </style>#i;
}
else
{
#	$text =~ s#</style>#    p {text-indent: $opt_indent; margin-top: 0em; margin-bottom: 1em;  padding-top: 0em; padding-bottom: 1em}\n  </style>#i;
}

#strip width, border, height from <img> to avoid squished pictures or even disappearing ones for .imp
#also remove any width elements within the <div class=figcnter> which breaks for .epub
if ($opt_imgsrc)
{
	$text =~ s#<img .*?(src="[^"]*")[^>]*>#<img $1 />#gi;
	$text =~ s#<div class="figcenter" style="width[^>]*>#<div class="figcenter">#gi;
}

#Specific PGA HTML fixes
$text =~ s#{\s*color\s*:(\s*"*)green("*;*)#{ color: $1red$2#ig unless not $PGA;
$text =~ s/\s*color:\s*#8d8b8a\s*}/ color: #5d5b5a}/ig unless not $PGA;
$text =~ s#P {font-size:"14pt"}##ig unless not $PGA;
$text =~ s#table {font-size:"14pt"}##ig unless not $PGA;

#Miscellaneous HTML fixes
#$text =~ s/.figcenter\s*{\s*margin: auto;/.figcenter   {/i;
#$text =~ s/.caption\s*{/.caption { page-break-inside: avoid; /i;
$text =~ s#toc\s*{\s*([^}]*)margin-left:[^;]*;#toc { $1 margin-left: $opt_LRmargins;#i;
$text =~ s#</style>#  br {margin-top: 0em; margin-bottom: 0em;  padding-top: 0em; padding-bottom: 0em}\n  </style>#i;
#split combined <a name and href> tags into <a name> and <a href> 
$text =~ s#<a([^>]*) name([^>]*) href([^>]*)>#<a$1 name $2><a href $3>#gi;
#fix up (unwanted) blank lines at end
$text =~ s#((<br([^>])*>)*(\s)*)*</body>#\n</body>#gi;

#insert before </body> a one-liner with BOOKTITLE by AUTHOR (PG Release: RELEASE)
if (!$opt_noPGtrailer)
{
	if (!$PGA)
	{
		$text =~ s#</body>#<h6 style="page-break-before: always"><small>$BOOKTITLE<br />by $AUTHOR<br />(PG/NR Release: $RELEASED)</small></h6>\n</body>#i;
	}
	else
	{
		$text =~ s#</body>#<h6 style="page-break-before: always"><small>$BOOKTITLE<br />by $AUTHOR<br />(PGA/NR Release: $RELEASED)</small></h6>\n</body>#i;
	}
}

if ($opt_cover) #$opt_PGtitle
{
	$text =~ s#<!--Start-->#<!--Start-->\n<br style="page-break-before: always" />#;
	$text =~ s#<!--Start-->#<!--Start-->\n$fixpre2</pre>#;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>Language   : $LANGUAGE</p># unless !$LANGUAGE;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>Released   : $RELEASED</p># unless !$RELEASED;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>Illustrator: $ILLUSTRATOR</p># unless !$ILLUSTRATOR;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>eBook No.  : $EBOOKNO (PG Australia)</p># unless !$EBOOKNO ;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>Author     : $AUTHOR</p># unless !$AUTHOR;
	$text =~ s#<!--Start-->#<!--Start-->\n<p>Book Title : $BOOKTITLE</p># unless !$BOOKTITLE;
	$text =~ s#<!--Start-->#<!--Start-->\n<pre>$fixpre1#;
}
	
#Custom Perl RegEx (a catch-all for tweaking unforeseen complications).  User must escape " with \".
if ($opt_search)
{
	if ( $opt_modi &&  $opt_modg ) { $text =~ s/$opt_search/eval qq{"$opt_replace"}/eig; }
	if (!$opt_modi || !$opt_modg ) { $text =~ s/$opt_search/eval qq{"$opt_replace"}/e; }
	if ( $opt_modi )               { $text =~ s/$opt_search/eval qq{"$opt_replace"}/ei; }
	if ( $opt_modg )               { $text =~ s/$opt_search/eval qq{"$opt_replace"}/eg; }
}

open (OUTFILE, ">$outfile");
print OUTFILE $text;
close(OUTFILE);

print "Wrote cleaned HTML \"$outfile\"\n\n";

#Everything setup so create the actual ebooks

if (!$opt_v)
{
	close(STDOUT);
	open(STDOUT,">nul"); 
}

if (!$opt_nobatch)
{
	open (BATFILE, ">rebuild-$PGetext.bat");
	print BATFILE "\@echo off\n";
	print BATFILE "rem GuteBook batch script devised by Nick Rapallo (nrapallo) to\n";
	print BATFILE "rem rebuild ebooks after \"$PGetext\" source .html modified/edited for:\n";
	print BATFILE "rem \"$BOOKTITLE by $AUTHOR\"\n";
	print BATFILE "rem (PG Release: $RELEASED)\n";
}

if (!$opt_calibreold)
{
if ($opt_srcepub)
{
	print "---\nConverting to Source .epub...\n";
	$epubbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE\_source.epub");
	system ("ebook-convert \"$outfile\" \"$epubbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//$pagebreak[0]\" --output-profile=default --dont-split-on-page-breaks --flow-size=40000") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Source .epub\n";
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE\_source.epub\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//$pagebreak[0]\" --output-profile=default --dont-split-on-page-breaks --flow-size=40000\n";
	}
}

if ($opt_epub)
{
	print "---\nConverting to Sony .epub...\n";
	$epubbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.epub");
	print BATFILE "\nrem Convert .htm to Sony .epub\n" unless $opt_nobatch;
	if ($pagebreak[1])
	{
                         
		system ("ebook-convert \"$outfile\" \"$epubbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//*[name()='$pagebreak[0]' or name()='$pagebreak[1]']\" --output-profile=sony") == 0 || warn $!;
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.epub\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//*[name()='$pagebreak[0]' or name()='$pagebreak[1]']\" --output-profile=sony\n" unless $opt_nobatch;
	}
	else
	{
		system ("ebook-convert \"$outfile\" \"$epubbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//$pagebreak[0]\" --output-profile=sony") == 0 || warn $!;
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.epub\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --chapter \"//$pagebreak[0]\" --output-profile=sony\n" unless $opt_nobatch;
	}
}

if ($opt_lrf)
{
	print "---\nConverting to Sony .lrf...\n";
	$lrfbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.lrf");
	system ("ebook-convert \"$outfile\" \"$lrfbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" ") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Sony .lrf\n";
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.lrf\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n" unless $opt_nobatch;
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE.epub\" \"$AUTHOR - $BOOKTITLE.lrf\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n" unless $opt_nobatch;
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE\_source.epub\" \"$AUTHOR - $BOOKTITLE.lrf\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n" unless $opt_nobatch;
	}
}

if ($opt_mobi)
{
	print "---\nConverting to Mobipocket .mobi...\n";
	$mobibook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.mobi");
	system ("ebook-convert \"$outfile\" \"$mobibook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\"  --max-toc-links=0") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Mobipocket .mobi or alternatively use mobigen or Mobipocket Creator\n";
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.mobi\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --max-toc-links=0\n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE.epub\" \"$AUTHOR - $BOOKTITLE.mobi\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --max-toc-links=0\n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE\_source.epub\" \"$AUTHOR - $BOOKTITLE.mobi\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --max-toc-links=0\n";
		print BATFILE "rem Note: To rebuild .mobi using Mobipocket Creator instead, just open the resulting .opf, update the metadata and Build your .prc.\n";
		print BATFILE "rem mobigen -s0 -c1 \"$AUTHOR - $BOOKTITLE.opf\"\n";
	}
}

if ($opt_lit)
{
	print "---\nConverting to Microsoft .lit...\n";
	$litbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.lit");
	system ("ebook-convert \"$outfile\" \"$litbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" ") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Microsoft .lit\n";
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.lit\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE.epub\" \"$AUTHOR - $BOOKTITLE.lit\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE\_source.epub\" \"$AUTHOR - $BOOKTITLE.lit\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" \n";
	}
}

if ($opt_pdb)
{
	print "---\nConverting to eReader .pdb...\n";
	$pdbbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.pdb");
	system ("ebook-convert \"$outfile\" \"$pdbbook\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --format=ereader") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to eReader .pdb\n";
		print BATFILE "ebook-convert \"$htmlbook\" \"$AUTHOR - $BOOKTITLE.pdb\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --format=ereader\n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE.epub\" \"$AUTHOR - $BOOKTITLE.pdb\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --format=ereader\n";
		print BATFILE "rem ebook-convert \"$AUTHOR - $BOOKTITLE\_source.epub\" \"$AUTHOR - $BOOKTITLE.pdb\" --title \"$BOOKTITLE\" --authors \"$AUTHOR\" --publisher=\"$PGNAME\" --format=ereader\n";
	}
}
}
else
{
if ($opt_srcepub)
{
	print "---\nConverting to Source .epub...\n";
	$epubbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE\_source.epub");
	system ("any2epub \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$epubbook\" --dont-split-on-page-breaks --profile=None") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Source .epub\n";
		print BATFILE "any2epub \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE\_source.epub\" --dont-split-on-page-breaks --profile=None\n";
	}
}

if ($opt_epub)
{
	print "---\nConverting to Sony .epub...\n";
	$epubbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.epub");
	print BATFILE "\nrem Convert .htm to Sony .epub\n" unless $opt_nobatch;

	if ($pagebreak[1])
	{
		system ("any2epub \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$epubbook\" --chapter \"//*[name()='$pagebreak[0]' or name()='$pagebreak[1]']\" --profile=PRS505") == 0 || warn $!;
		print BATFILE "any2epub \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE.epub\" --chapter \"//*[name()='$pagebreak[0]' or name()='$pagebreak[1]']\" --profile=PRS505\n" unless $opt_nobatch;
	}
	else
	{
		system ("any2epub \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$epubbook\" --chapter \"//$pagebreak[0]\" --profile=PRS505") == 0 || warn $!;
		print BATFILE "any2epub \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE.epub\" --chapter \"//$pagebreak[0]\" --profile=PRS505\n" unless $opt_nobatch;
	}
}

if ($opt_lrf)
{
	print "---\nConverting to Sony .lrf...\n";
	$lrfbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.lrf");
	system ("any2lrf \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --category=\"$PGNAME\" -o \"$lrfbook\" --disable-chapter-detection --left-margin=25 --right-margin=25 --link-exclude=http:") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Sony .lrf\n";
		print BATFILE "any2lrf \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --category=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE.lrf\" --disable-chapter-detection --left-margin=25 --right-margin=25 --link-exclude=http:\n";
		print BATFILE "rem any2lrf \"$AUTHOR - $BOOKTITLE.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --category=\"$PGNAME\" -o \"-$AUTHOR - $BOOKTITLE.lrf\" --disable-chapter-detection --left-margin=25 --right-margin=25 --link-exclude=http:\n";
		print BATFILE "rem any2lrf \"$AUTHOR - $BOOKTITLE\_source.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --category=\"$PGNAME\" -o \"--$AUTHOR - $BOOKTITLE.lrf\" --disable-chapter-detection --left-margin=25 --right-margin=25 --link-exclude=http:\n";
	}
}

if ($opt_mobi)
{
	print "---\nConverting to Mobipocket .mobi...\n";
	$mobibook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.mobi");
	system ("any2mobi \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$mobibook\" --max-toc-links=0") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Mobipocket .mobi or alternatively use mobigen or Mobipocket Creator\n";
		print BATFILE "any2mobi \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE.mobi\" --max-toc-links=0\n";
		print BATFILE "rem any2mobi \"$AUTHOR - $BOOKTITLE.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"-$AUTHOR - $BOOKTITLE.mobi\" --max-toc-links=0\n";
		print BATFILE "rem any2mobi \"$AUTHOR - $BOOKTITLE\_source.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"--$AUTHOR - $BOOKTITLE.mobi\" --max-toc-links=0\n";
		print BATFILE "rem Note: To rebuild .mobi using Mobipocket Creator instead, just open the resulting .opf, update the metadata and Build your .prc.\n";
		print BATFILE "rem mobigen -s0 -c1 \"$AUTHOR - $BOOKTITLE.opf\"\n";
	}
}

if ($opt_lit)
{
	print "---\nConverting to Microsoft .lit...\n";
	$litbook=File::Spec->catfile($destdir, "$AUTHOR - $BOOKTITLE.lit");
	system ("any2lit \"$outfile\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$litbook\" --dont-split-on-page-breaks") == 0 || warn $!;
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Convert .htm to Microsoft .lit\n";
		print BATFILE "any2lit \"$htmlbook\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"$AUTHOR - $BOOKTITLE.lit\" --dont-split-on-page-breaks\n";
		print BATFILE "rem any2lit \"$AUTHOR - $BOOKTITLE.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"-$AUTHOR - $BOOKTITLE.lit\" --dont-split-on-page-breaks\n";
		print BATFILE "rem any2lit \"$AUTHOR - $BOOKTITLE\_source.epub\" -t \"$BOOKTITLE\" -a \"$AUTHOR\" --publisher=\"$PGNAME\" -o \"--$AUTHOR - $BOOKTITLE.lit\" --dont-split-on-page-breaks\n";
	}
}

}
if ($opt_1200 || $opt_1150 || $opt_1100)
{
	print "---\nConverting to Ebookwise .imp/.rb...\n";
	#$EBW1150 | $REB1200 | $REB1100 | $ETIePUB;
	$impbook = "";
	if ($opt_1200) { $impbook .= "--1200 "; }
	if ($opt_1150) { $impbook .= "--1150 "; }
	if ($opt_1100) { $impbook .= "--1110 "; }
	if (!$opt_nobatch)
	{
		print BATFILE "\nrem Note: To rebuild .imp or .rb using eBook Publisher, just open (double-click) the resulting .opf and choose your Build Target.\n";
		print BATFILE "Html2IMP $impbook -a \"$AUTHOR\" -t \"$BOOKTITLE\" -c \"$CATEGORY\" \"$outfile\"\n";
		print BATFILE "rem rbmake -a \"$AUTHOR\" -t \"$BOOKTITLE\" -f y -b -e -i -z -o \"$AUTHOR - $BOOKTITLE\" \"$htmlbook\"\n";
	}
}
html2imps("$AUTHOR", "$BOOKTITLE", "$CATEGORY", "$outfile" ) == 0 || warn $!;

print BATFILE "\npause\n";
close(BATFILE);

chdir $currdir; 
print "---\nDone converting ebook(s) for PG file \"$HREF\"\n\n";

if (!$opt_v)
{
	close(STDOUT);
	open(STDOUT,">-"); 
}

sub html2imps() 
{
	use Win32::OLE;
	use Win32::OLE qw(EVENTS);
	Win32::OLE->Initialize(Win32::OLE::COINIT_APARTMENTTHREADED);

	my ($author,$title,$category,$htmlfile) = @_;

	#$usage='Html2imp.pl Authorname Title Category htmlfilename';
	#die "Usage:  $usage\n" if $#ARGV != 3;

	###################################################################
	#
	# get the interfaces, complain and quit if we cannot
	#
	$project = Win32::OLE->new("SBPublisher.Project");
	if (!$project)
	{
		warn "Unable to get IProject interface. Please install eBook Publisher.\n";
		return;
	} 
	
	$builder = Win32::OLE->new("SBPublisher.Builder");
	if (!$builder)
	{
		warn "Unable to get IBuilder interface. Please re-install eBook Publisher.\n";
		return;
	} 
	
	# Setup the event handling.
	#
	Win32::OLE->WithEvents($builder, 'EventHandlers');
	
	###################################################################
	#
	# Create a new project and add our document file with optional cover.
	#
	$project->ClearAll();
	#$project->AddSourceFile("cover_nr.htm");
	$project->AddSourceFile($htmlfile);
	 
	
	###################################################################
	#
	# Set the various "metadata" items for the publication
	#
	$project->{AuthorFirstName} = $author;
	$project->{BookTitle}       = $title;
	$project->{Category}        = $category;
	$project->{Publisher}       = "GuteBook by nrapallo";
	#$project->{ISBN} = $project->CanonicalizeISBN("0448163004 ");
	#$project->{BISAC} = "FIC004000";
	
	###################################################################
	#
	# Now build the OEBFF output
	#
	$project->{OutputDirectory} = ".";
	$project->{Compress}        = 1;   #True
	$project->{Encrypt}         = 0;   #False
	$project->{KeepAnchors}     = 1;   #True
	$project->{Language}        = "en";
	$project->{RequireISBN}     = 0;   #False
	$project->{Zoom}            = 2;
	$project->{BookFileName}    = $author . " - " . $title;

	$project->Save($author . " - " . $title . ".opf");
	
	###################################################################
	#
	# Now build the EBW/GEB 1150 (gray HalfVga) .IMP output
	#
	if ($opt_1150)
	{
		$project->{BookFileName}    = $author . " - " . $title;
		$project->Save($author . " - " . $title . ".opf");
	
		$project->{BuildTarget} = 2;
	
		# Now generate both the OEBFF and/or .IMP output
		#$builder->GenerateOEBFF($project, 1);
		$builder->Build($project);
		if (Win32::OLE->LastError() != 0)
		{
			print "ERROR: Build method failed for EBW 1150.\n";
		}
		else
		{
			print "EBW 1150 ebook created!\n";
		}
	}
	
	###################################################################
	#
	# Now build the REB 1100 (mono HalfVGA) .RB output
	#
	if ($opt_1100)
	{
		$project->{BookFileName}    = $author . " - " . $title;
		$project->Save($author . " - " . $title . ".opf");
	
		$project->{BuildTarget} = 3;
	
		# Now generate the .RB output
		$builder->Build($project);
		if (Win32::OLE->LastError() != 0)
		{
			print "ERROR: Build method failed for REB 1100.\n";
		}
		else
		{
			print "REB 1100 ebook created!\n";
		}
	}
	
	###################################################################
	#
	# Now build the REB 1200 (FullVga) .IMP output
	if ($opt_1200)
	{
		$project->{BookFileName}    = $author . " - " . $title . "_1200";
		$project->Save($author . " - " . $title . "_1200.opf");
	
		$project->{BuildTarget}     = 1;
	
		# Now generate both the OEBFF and/or .IMP output
		#$builder->GenerateOEBFF($project, 1);
		$builder->Build($project);
		if (Win32::OLE->LastError() != 0)
		{
			print "ERROR: Build method failed for REB 1200.\n";
		}
		else
		{
			print "REB 1200 ebook created!\n";
		}
	}
	
	Win32::OLE->Uninitialize();
}

#not yet implemented	
sub create_cover_htm {     
    if ($coverimageid < 0)
    {   
        return 0;
    }
    else
    {
        my $coverimagefilename = $image_index_to_filename{$coverimageid+1};

        open TEMPFILE, ">$outdir/cover_nr.htm" or die "Cannot create cover.htm file";
        binmode (TEMPFILE);

        my $coverhtm = "<HTML><HEAD><STYLE type=\"text/css\">p {text-indent:0em; margin-left:2px; margin-right:2px}</STYLE></HEAD>\n<BODY>\n";
        $coverhtml .= "<p align=center><center><img src=\"$coverimagefilename\"></center></p>\n</body>\n</html>\n";

        print TEMPFILE $coverhtm;
        close TEMPFILE;
        print "Adding cover image: '$outdir/$coverimagefilename'\n";
        return 1;
    }
}

###################################################################
#
# Event Handlers
#
package EventHandlers;

sub OnBuildStart()
{
	my ($builder, $project, @args) = @_;
#	print "Beginning validation...\n";
}

sub OnSourceStart()
{
	my ($builder, $filename, @args) = @_;
#	if ( $filename ne "") { print "Parsing $filename...\n"; }
}

sub OnError()
{
	# Get the arguments
	my ($builder,
		$filename, 
		$msg, 
		$line, 
		$col, 
		$severity, 
		@args) = @_;

	my @severities = ("NOTE", "FATAL ERROR", "ERROR", "WARNING");

	if ($filename =~ m/^.+[\\|\/](.+?)$/) { $filename = $1; }

	# Print out the error message including any NOTE feedback.
	# if ($severity >= 0)
	# To ignore Warnings, change below to: if ($severity < 3)
	if (!$severity)
	{
		printf(" %-15s (L:%6d, C:%6d) %-7s:",
			$filename,
			$line,
			$col,
			$severities[$severity]);

		print " $msg\n";
	}
}