#!/usr/bin/python
"""
	Program: pielrf.py

	Author: EatingPie <eatingpie@gmail.com>

    Creates a Sony Reader/Librie lrf file from a text file.
	Includes easy table of contents, chapterization and curly quotes.

	Features included:

		* Curly (typographic) quotes.
		* Paragraph auto-flow.
		* Table of Contents and Chapterization if you use the <chapter> tag.
		* Understands HTML tags <i></i>, <b></b> <center></center>,
		  <sub></sub>, <sup></sup>, <p></p>.
		* Understands ALL HTML Ampersand tags - &amp;, &pound, &uumlat, etc.
		* Paragraphs can be delimited by tabs, spaces, vertical whitespace.
		* Font size / weight (bold) can be controlled from command line.
		* Ability to control almost everything else from the command line too!

	NEEDED FEATURES
		- Missing per-page Header

	LICENSE

		Okay, here's the deal.  Just like every other license out there,
		you use this, anything that happens it's not my fault!

		So here are the rules.

		(1) Feel free to change this code, but ONLY IF YOU MAKE IT BETTER!
		    Making the code worse is totally, 100% against this license
			agreement.  Oh, and you have to keep my name on it somehwere.

		(2) Feel free to share this code.

		(3) If something bad happens, blame Canada, don't blame me.


"""

from pylrs.pylrs import *
from sys         import *
from optparse    import OptionParser

#
# Verson Number
#
VERSION = 1.0

#
# Constants
# Formatting Flags
#
ROMAN  = 0
ITALIC = 1
BOLD   = 2
SUB    = 3
SUP    = 4

#
# Globals for make_curly_quotes()
#
prev_char       = ' '
prev_was_open   = False
#
# Globals for format_paragraph()
#
format          = ROMAN
centered        = False

#
# Globals - Set by Command Line Options
#
use_rdquotes    = False
preserve_spaces = False
verbose         = True


#############################################################
# def format_paragraph():									#
#															#
#############################################################
def format_paragraph(page, ts, cs, bs, line):

	global format
	global centered

	if len(line) <= 0 :
		return

	#
	# Set Up Initial paragraph based on previous tags
	#
	if centered :
		paragraph = page.TextBlock(cs, bs).Paragraph()
	else :
		paragraph = page.TextBlock(ts, bs).Paragraph()

	#
	# Split at html open, a "<"
	#
	first_char = line[0]
	formatting = line.split("<")
	length     = len(formatting)
	start      = 0

	#
	# If the open of the paragraph was NOT a tag
	#
	initial_lt = True
	if first_char != '<' and len(formatting[0]) >= 1 :
		initial_lt = False
	#endif

	#
	# Each line in split is broken at a potential html format tag
	#
	first = True
	i     = start
	while i < length :
		if len(formatting[i]) <= 0 :
			i += 1
			continue
		#endif

		f_str      = formatting[i]
		if first :
			needs_lt = initial_lt
			first    = False
		else :
			needs_lt = True

		#
		# First character should tell us what kind of
		# formatting we are doing here
		#

		#
		# START FORMATTING
		#

		#
		# <I> - ITALICS
		# For some reason find range 0,1 doesn't work, but 0,2 does
		#
		if f_str.find("i>", 0, 2) >= 0 or \
		   f_str.find("I>", 0, 2) >= 0 :
			format = ITALIC
			x_str  = f_str.replace("i>","",1).replace("I>","",1)
			paragraph.append(Italic(x_str))
			i += 1
			continue
		#endif

		#
		# <B> - BOLD
		#
		if f_str.find("b>", 0, 2) >= 0 or \
		   f_str.find("B>", 0, 2) >= 0 :
			format = BOLD
			x_str  = f_str.replace("b>","",1).replace("B>","",1)
			paragraph.append(Bold(x_str))
			i += 1
			continue
		#endif

		#
		# <SUB> - SUBSCRIPT
		#
		if f_str.find("sub>", 0, 4) >= 0 or \
		   f_str.find("SUB>", 0, 4) >= 0 :
			format = SUB
			x_str  = f_str.replace("sub>","",1).replace("SUB>","",1)
			paragraph.append(Sub(x_str))
			i += 1
			continue
		#endif

		#
		# <SUP> - SUPERSCRIPT
		#
		if f_str.find("sup>", 0, 4) >= 0 or \
		   f_str.find("SUP>", 0, 4) >= 0 :
			format = SUP
			x_str  = f_str.replace("sup>","",1).replace("SUP>","",1)
			paragraph.append(Sup(x_str))
			i += 1
			continue
		#endif

		#
		# END FORMATTING
		#

		# </I> - END ITALICS
		#
		if f_str.find("/i>", 0, 3) >= 0 or \
		   f_str.find("/I>", 0, 3) >= 0 :
			format = ROMAN
			r_str  = f_str
			x_str  = r_str.replace("/i>","",1).replace("/I>","",1)
			paragraph.append(x_str)
			i += 1
			continue
		#endif

		#
		# </B> - END BOLD
		#
		if f_str.find("/b>", 0, 3) >= 0 or \
		   f_str.find("/B>", 0, 3) >= 0 :
			format = ROMAN
			r_str  = f_str
			x_str  = r_str.replace("/b>","",1).replace("/B>","",1)
			paragraph.append(x_str)
			i += 1
			continue
		#endif

		#
		# </SUB> - END SUBSCRIPT
		#
		if f_str.find("/sub>", 0, 5) >= 0 or \
		   f_str.find("/SUB>", 0, 5) >= 0 :
			format = ROMAN
			r_str  = f_str
			x_str  = r_str.replace("/sub>","",1).replace("/SUB>","",1)
			paragraph.append(x_str)
			i += 1
			continue
		#endif

		#
		# </SUP> - END SUPERSCRIPT
		#
		if f_str.find("/sup>", 0, 5) >= 0 or \
		   f_str.find("/SUP>", 0, 5) >= 0 :
			format = ROMAN
			r_str  = f_str
			x_str  = r_str.replace("/sup>","",1).replace("/SUP>","",1)
			paragraph.append(x_str)
			i += 1
			continue
		#endif

		#
		# Fall-Thru Formatting
		#
		# Tags that apply breaks and centering may have text
		# afterward that keeps formatting, so they fall-thru to
		# printable case.
		#

		#
		# <CENTER>  - CENTERED TEXT
		#
		# Re-Set paragrah element to be centered.
		#
		if f_str.find("center>", 0, 7) >= 0 or \
		   f_str.find("CENTER>", 0, 7) >= 0 :
			centered  = True
			paragraph = page.TextBlock(cs, bs).Paragraph()
			x_str     = f_str.replace("center>","",1).replace("CENTER>","",1)
			if len(x_str) <= 0 or x_str.isspace():
				i += 1
				continue
			needs_lt = False
			f_str    = x_str
		#endif

		#
		# </CENTER> - END CENTERED (JUSTIFIED TEXT)
		#
		# Re-Set paragrah element to be Justified
		#
		if f_str.find("/center>", 0, 8) >= 0 or \
		   f_str.find("/CENTER>", 0, 8) >= 0 :
			centered  = False
			paragraph = page.TextBlock(ts, bs).Paragraph()
			x_str     = f_str.replace("/center>","",1).replace("/CENTER>","",1)
			if len(x_str) <= 0 or x_str.isspace():
				i += 1
				continue
			needs_lt = False
			f_str    = x_str
		#endif

		#
		# <BR> - Break
		#
		if f_str.find("br>", 0, 3) >= 0 or \
		   f_str.find("BR>", 0, 3) >= 0 :
			x_str  = f_str.replace("br>","",1).replace("BR>","",1)
			is_empty = False
			if len(x_str) <= 0 or x_str.isspace() :
				is_empty = True

			#
			# BR simply generates a new paragraph
			#
			if centered :
				paragraph = page.TextBlock(cs, bs).Paragraph()
			else :
				paragraph = page.TextBlock(ts, bs).Paragraph()

			if is_empty :
				i += 1
				continue
			needs_lt = False
			f_str    = x_str
		#endif

		#
		# The split happened on an actual "<" sign, so we
		# need to put that back and print the string, unless
		# this is a fall-thru case.
		#
		s = f_str
		if needs_lt :
			s = "<" + f_str
		if format == ROMAN :
			paragraph.append(s)
		elif format == ITALIC :
			paragraph.append(Italic(s))
		elif format == BOLD :
			paragraph.append(Bold(s))
		elif format == SUB :
			paragraph.append(Sub(s))
		elif format == SUP :
			paragraph.append(Sup(s))

		#increment
		i += 1
	#endfor

# enddef format_paragraph()



#############################################################
# def make_curly_quotes():									#
#															#
#############################################################
def make_curly_quotes(line):
	global prev_char
	global prev_was_open
	global use_rdquotes

	do_open   = True
	next_char = '\n'

	#
	# Replace any HTML quotes with he actual quote
	# This allows the curly-quote algorithm to function
	# on quotes only.
	#
	# The global use_rdquotes means we want to ignore
	# html-explicit curly quotes and use our own algorith
	# to figure them out
	#
	line = line.replace("&quot;", "\"")
	if not use_rdquotes :
		line = line.replace("&ldquo;", "\"")
		line = line.replace("&rdquo;", "\"")
		line = line.replace("&#8220;", "\"")
		line = line.replace("&#8221;", "\"")

	for i in range(len(line)) :
		if i+1 < len(line) :
			next_char = line[i+1]
		else :
			next_char = '\n';

		#
		# DOUBLE QUOTE
		#
		if line[i] == '"' :
			do_open = True

			# Prev IS Whtespace
			if prev_char.isspace() or prev_char == '\n' :
				# Open by default
				do_open = True

				# Special case if it is a dangling quote
				if next_char.isspace() or next_char == '\n' :
					do_open = False
					if not prev_was_open :
						do_open = True
				#endif

			# Prev is NOT Whitespace
			else :
				do_open = False
				if prev_char == '-' and next_char.isalnum() :
					do_open = True
				if prev_char == '>' :
					do_open = not prev_was_open

			if do_open :
				line     = line.replace("\"",u"\u201C",1)
				prev_was_open = True
			else :
				line     = line.replace("\"",u"\u201D",1)
				prev_was_open = False
		#endif

		#
		# SINGLE QUOTE
		#
		if line[i] == '\'' :
			do_open = False

			# Prev IS Whtespace
			if prev_char.isspace() or prev_char == '\n' :
				# Open by default
				do_open = True
				#
				# Find next ' and see if it is a close quote vs
				# just an apostrophe
				#
				index = line.find("'",i+1)
				if index > -1 :
					if line[index-1].isalnum() and line[index+1].isspace() :
						do_open = True
					else :
						do_open = False
					# endif
				# endif
			#endif

			if do_open :
				line = line.replace("\'",u"\u2018",1)
			else :
				line = line.replace("\'",u"\u2019",1)
		#endif

		prev_char = line[i]

	prev_char = '\n'
	return line

# enddef make_curl_quotes()

#############################################################
# def convert_misc():										#
#															#
#############################################################
def convert_misc(line):
	global preserve_spaces

	#
	# Compresses multiple spaces into single-space
	# The Reader DOES NOT justify text correclty when
	# multiple spaces between words are involved (it seems to
	# interpret the first space encountered as "flowable" and
	# the rest are treated as non-breaking spaces).
	#
	if not preserve_spaces :
		spaceline = line.replace("  ", " ")
		while spaceline != line :
			line      = spaceline
			spaceline = line.replace("  ", " ")

	#
	# Em-Dash Replacement
	#
	# Do triple dash first, then double-dash if any are left
	#
	line = line.replace("---",   u"\u2013")
	line = line.replace("--",    u"\u2013")

	return line

# enddef convert_misc()


#############################################################
# def convert_html_ampersands():							#
#															#
#############################################################
def convert_html_ampersands(line):

	#
	# Basically, just global replace with equivalents
	#

	#
	# The only exception is if we want to use our algorithm
	# for curly quotes, rather than trusting existing HTML
	#
	if use_rdquotes :
		line = line.replace("&ldquo;", u"\u201C")
		line = line.replace("&rdquo;", u"\u201D")
		line = line.replace("&#8220;", u"\u201C")
		line = line.replace("&#8221;", u"\u201D")

	line = line.replace("&quot;",     u"\"")
	line = line.replace("&mdash;",    u"\u2013")
	line = line.replace("&ndash;",    u"\u2013")
	line = line.replace("&amp;",      u"\u0026")
	line = line.replace("&lt;",       u"\u003C")
	line = line.replace("&gt;",       u"\u003E")
	line = line.replace("&nbsp;",     u"\u00A0")
	line = line.replace("&iexcl;",    u"\u00A1")
	line = line.replace("&cent;",     u"\u00A2")
	line = line.replace("&pound;",    u"\u00A3")
	line = line.replace("&curren;",   u"\u00A4")
	line = line.replace("&yen;",      u"\u00A5")
	line = line.replace("&brvbar;",   u"\u00A6")
	line = line.replace("&sect;",     u"\u00A7")
	line = line.replace("&die;",      u"\u00A8")
	line = line.replace("&copy;",     u"\u00A9")
	line = line.replace("&ordf;",     u"\u00AA")
	line = line.replace("&laquo;",    u"\u00AB")
	line = line.replace("&not;",      u"\u00AC")
	line = line.replace("&shy;",      u"\u00AD")
	line = line.replace("&reg;",      u"\u00AE")
	line = line.replace("&macron;",   u"\u00AF")
	line = line.replace("&degree;",   u"\u00B0")
	line = line.replace("&plusmn;",   u"\u00B1")
	line = line.replace("&sup2;",     u"\u00B2")
	line = line.replace("&sup3;",     u"\u00B3")
	line = line.replace("&acute;",    u"\u00B4")
	line = line.replace("&micro;",    u"\u00B5")
	line = line.replace("&para;",     u"\u00B6")
	line = line.replace("&middot;",   u"\u00B7")
	line = line.replace("&Cedilla;",   u"\u00B8")
	line = line.replace("&sup1;",     u"\u00B9")
	line = line.replace("&ordm;",     u"\u00BA")
	line = line.replace("&raquo;",    u"\u00BB")
	line = line.replace("&frac14;",   u"\u00BC")
	line = line.replace("&frac12;",   u"\u00BD")
	line = line.replace("&frac34;",   u"\u00BE")
	line = line.replace("&iquest;",   u"\u00BF")
	line = line.replace("&Agrave;",   u"\u00C0")
	line = line.replace("&Aacute;",   u"\u00C1")
	line = line.replace("&Acirc;",    u"\u00C2")
	line = line.replace("&Atilde;",   u"\u00C3")
	line = line.replace("&Auml;",     u"\u00C4")
	line = line.replace("&Aring;",    u"\u00C5")
	line = line.replace("&AElig;",    u"\u00C6")
	line = line.replace("&Ccedil;",   u"\u00C7")
	line = line.replace("&Egrave;",   u"\u00C8")
	line = line.replace("&Eacute;",   u"\u00C9")
	line = line.replace("&Ecirc;",    u"\u00CA")
	line = line.replace("&Euml;",     u"\u00CB")
	line = line.replace("&Igrave;",   u"\u00CC")
	line = line.replace("&Iacute;",   u"\u00CD")
	line = line.replace("&Icirc;",    u"\u00CE")
	line = line.replace("&Iuml;",     u"\u00CF")
	line = line.replace("&ETH;",      u"\u00D0")
	line = line.replace("&Ntilde;",   u"\u00D1")
	line = line.replace("&Ograve;",   u"\u00D2")
	line = line.replace("&Oacute;",   u"\u00D3")
	line = line.replace("&Ocirc;",    u"\u00D4")
	line = line.replace("&Otilde;",   u"\u00D5")
	line = line.replace("&Ouml;",     u"\u00D6")
	line = line.replace("&times;",    u"\u00D7")
	line = line.replace("&Oslash;",   u"\u00D8")
	line = line.replace("&Ugrave;",   u"\u00D9")
	line = line.replace("&Uacute;",   u"\u00DA")
	line = line.replace("&Ucirc;",    u"\u00DB")
	line = line.replace("&Uuml;",     u"\u00DC")
	line = line.replace("&Yacute;",   u"\u00DD")
	line = line.replace("&THORN;",    u"\u00DE")
	line = line.replace("&szlig;",    u"\u00DF")
	line = line.replace("&agrave;",   u"\u00E0")
	line = line.replace("&aacute;",   u"\u00E1")
	line = line.replace("&acirc;",    u"\u00E2")
	line = line.replace("&atilde;",   u"\u00E3")
	line = line.replace("&auml;",     u"\u00E4")
	line = line.replace("&aring;",    u"\u00E5")
	line = line.replace("&aelig;",    u"\u00E6")
	line = line.replace("&ccedil;",   u"\u00E7")
	line = line.replace("&egrave;",   u"\u00E8")
	line = line.replace("&eacute;",   u"\u00E9")
	line = line.replace("&ecirc;",    u"\u00EA")
	line = line.replace("&euml;",     u"\u00EB")
	line = line.replace("&igrave;",   u"\u00EC")
	line = line.replace("&iacute;",   u"\u00ED")
	line = line.replace("&icirc;",    u"\u00EE")
	line = line.replace("&iuml;",     u"\u00EF")
	line = line.replace("&eth;",      u"\u00F0")
	line = line.replace("&ntilde;",   u"\u00F1")
	line = line.replace("&ograve;",   u"\u00F2")
	line = line.replace("&oacute;",   u"\u00F3")
	line = line.replace("&ocirc;",    u"\u00F4")
	line = line.replace("&otilde;",   u"\u00F5")
	line = line.replace("&ouml;",     u"\u00F6")
	line = line.replace("&divide;",   u"\u00F7")
	line = line.replace("&oslash;",   u"\u00F8")
	line = line.replace("&ugrave;",   u"\u00F9")
	line = line.replace("&uacute;",   u"\u00FA")
	line = line.replace("&ucirc;",    u"\u00FB")
	line = line.replace("&uuml;",     u"\u00FC")
	line = line.replace("&yacute;",   u"\u00FD")
	line = line.replace("&thorn;",    u"\u00FE")
	line = line.replace("&yuml;",     u"\u00FF")
	line = line.replace("&lowast;",   u"\u2217")
	line = line.replace("&minus;",    u"\u2212")
	line = line.replace("&cong;",     u"\u2245")
	line = line.replace("&Agr;",      u"\u0391")
	line = line.replace("&Bgr;",      u"\u0392")
	line = line.replace("&KHgr;",     u"\u03A7")
	line = line.replace("&Delta;",    u"\u0394")
	line = line.replace("&Egr;",      u"\u0395")
	line = line.replace("&PHgr;",     u"\u03A6")
	line = line.replace("&Gamma;",    u"\u0393")
	line = line.replace("&EEgr;",     u"\u0397")
	line = line.replace("&Igr;",      u"\u0399")
	line = line.replace("&thetav;",   u"\u03D1")
	line = line.replace("&Kgr;",      u"\u039A")
	line = line.replace("&Lambda;",   u"\u039B")
	line = line.replace("&Mgr;",      u"\u039C")
	line = line.replace("&Ngr;",      u"\u039D")
	line = line.replace("&Ogr;",      u"\u039F")
	line = line.replace("&Pi;",       u"\u03A0")
	line = line.replace("&Theta;",    u"\u0398")
	line = line.replace("&Rgr;",      u"\u03A1")
	line = line.replace("&Sigma;",    u"\u03A3")
	line = line.replace("&Tgr;",      u"\u03A4")
	line = line.replace("&Upsi;",     u"\u03A5")
	line = line.replace("&sfgr;",     u"\u03C2")
	line = line.replace("&Omega;",    u"\u03A9")
	line = line.replace("&Xi;",       u"\u039E")
	line = line.replace("&Psi;",      u"\u03A8")
	line = line.replace("&Zgr;",      u"\u0396")
	line = line.replace("&there4;",   u"\u2234")
	line = line.replace("&perp;",     u"\u22A5")
	line = line.replace("&alpha;",    u"\u03B1")
	line = line.replace("&beta;",     u"\u03B2")
	line = line.replace("&chi;",      u"\u03C7")
	line = line.replace("&delta;",    u"\u03B4")
	line = line.replace("&epsi;",     u"\u03B5")
	line = line.replace("&phis;",     u"\u03C6")
	line = line.replace("&gamma;",    u"\u03B3")
	line = line.replace("&eta;",      u"\u03B7")
	line = line.replace("&iota;",     u"\u03B9")
	line = line.replace("&phiv;",     u"\u03D5")
	line = line.replace("&kappa;",    u"\u03BA")
	line = line.replace("&lambda;",   u"\u03BB")
	line = line.replace("&mu;",       u"\u03BC")
	line = line.replace("&nu;",       u"\u03BD")
	line = line.replace("&ogr;",      u"\u03BF")
	line = line.replace("&pi;",       u"\u03C0")
	line = line.replace("&thetas;",   u"\u03B8")
	line = line.replace("&rho;",      u"\u03C1")
	line = line.replace("&sigma;",    u"\u03C3")
	line = line.replace("&tau;",      u"\u03C4")
	line = line.replace("&upsi;",     u"\u03C5")
	line = line.replace("&piv;",      u"\u03D6")
	line = line.replace("&omega;",    u"\u03C9")
	line = line.replace("&xi;",       u"\u03BE")
	line = line.replace("&psi;",      u"\u03C8")
	line = line.replace("&zeta;",     u"\u03B6")
	line = line.replace("&sim;",      u"\u223C")
	line = line.replace("&vprime;",   u"\u2032")
	line = line.replace("&le;",       u"\u2264")
	line = line.replace("&infin;",    u"\u221E")
	line = line.replace("&fnof;",     u"\u0192")
	line = line.replace("&clubs;",    u"\u2663")
	line = line.replace("&diams;",    u"\u2666")
	line = line.replace("&hearts;",   u"\u2665")
	line = line.replace("&spades;",   u"\u2660")
	line = line.replace("&harr;",     u"\u2194")
	line = line.replace("&larr;",     u"\u2190")
	line = line.replace("&uarr;",     u"\u2191")
	line = line.replace("&rarr;",     u"\u2192")
	line = line.replace("&darr;",     u"\u2193")
	line = line.replace("&Prime;",    u"\u2033")
	line = line.replace("&ge;",       u"\u2265")
	line = line.replace("&prop;",     u"\u221D")
	line = line.replace("&part;",     u"\u2202")
	line = line.replace("&bull;",     u"\u2022")
	line = line.replace("&ne;",       u"\u2260")
	line = line.replace("&equiv;",    u"\u2261")
	line = line.replace("&ap;",       u"\u2248")
	line = line.replace("&hellip;",   u"\u2026")
	line = line.replace("&aleph;",    u"\u2135")
	line = line.replace("&image;",    u"\u2111")
	line = line.replace("&real;",     u"\u211C")
	line = line.replace("&weierp;",   u"\u2118")
	line = line.replace("&otimes;",   u"\u2297")
	line = line.replace("&oplus;",    u"\u2295")
	line = line.replace("&empty;",    u"\u2205")
	line = line.replace("&cap;",      u"\u2229")
	line = line.replace("&cup;",      u"\u222A")
	line = line.replace("&sup;",      u"\u2283")
	line = line.replace("&supe;",     u"\u2287")
	line = line.replace("&nsub;",     u"\u2284")
	line = line.replace("&sub;",      u"\u2282")
	line = line.replace("&sube;",     u"\u2286")
	line = line.replace("&isin;",     u"\u2208")
	line = line.replace("&notin;",    u"\u2209")
	line = line.replace("&ang;",      u"\u2220")
	line = line.replace("&nabla;",    u"\u2207")
	line = line.replace("&trade;",    u"\u2122")
	line = line.replace("&prod;",     u"\u220F")
	line = line.replace("&radic;",    u"\u221A")
	line = line.replace("&sdot;",     u"\u22C5")
	line = line.replace("&and;",      u"\u2227")
	line = line.replace("&or;",       u"\u2228")
	line = line.replace("&hArr;",     u"\u21D4")
	line = line.replace("&lArr;",     u"\u21D0")
	line = line.replace("&uArr;",     u"\u21D1")
	line = line.replace("&rArr;",     u"\u21D2")
	line = line.replace("&dArr;",     u"\u21D3")
	line = line.replace("&loz;",      u"\u25CA")
	line = line.replace("&lang;",     u"\u2329")
	line = line.replace("&sum;",      u"\u2211")
	line = line.replace("&lceil;",    u"\u2308")
	line = line.replace("&lfloor;",   u"\u230A")
	line = line.replace("&rang;",     u"\u232A")
	line = line.replace("&int;",      u"\u222B")

	return line

# enddef convert_html_ampersands()

#############################################################
# def make_button_text():									#
#															#
#############################################################
def make_button_text(line):

	line = line.lstrip().rstrip()

	line = make_curly_quotes(line)
	line = convert_misc(line)
	line = convert_html_ampersands(line)

	line = line.replace("<i>","").replace("<I>","")
	line = line.replace("<b>","").replace("<B>","")
	line = line.replace("<sub>","").replace("<SUB>","")
	line = line.replace("<sup>","").replace("<SUP>","")
	line = line.replace("<center>","").replace("<CENTER>","")
	line = line.replace("<br>","").replace("<BR>","")
	#
	line = line.replace("</i>","").replace("</I>","")
	line = line.replace("</b>","").replace("</B>","")
	line = line.replace("</sub>","").replace("</SUB>","")
	line = line.replace("</sup>","").replace("</SUP>","")
	line = line.replace("</center>","").replace("</CENTER>","")
	line = line.replace("</br>","").replace("</BR>","")

	return line

# enddef make_button_text()

#############################################################
# def parse_cmdline():										#
#															#
#############################################################
def parse_cmdline():

	global cmdline

	cmdline = OptionParser(usage="usage: %prog -i infile -o outfile "
						   "[-t Title -a Author ...] (-h for all options)"
						   )

	#
	# Verbosity Off
	#
	cmdline.add_option("-q",  "--quite", dest="verbose",
					   action="store_false", default=True,
					   help="Turn of all textual output (quiet mode)"),
	cmdline.add_option("--version", dest="version", default=False,
					   action="store_true",
					   help="Print version number")

	#
	# Input / Output Files
	#
	cmdline.add_option("-i", "--infile", dest="infile",
					   action="store",   type="string",
					   help="Input text file to convert to lrf")
	cmdline.add_option("-o", "--outfile", dest="outfile",
					   action="store",    type="string",
					   help="Output lrf file for converted text")

	#
	# Title / Author
	#
	cmdline.add_option("-t", "--title", dest="title",default="Unknown Title",
					   action="store",  type="string",
					   help="Specify Book Title, use quotes")
	cmdline.add_option("-a", "--author",dest="author",default="Unknown Author",
					   action="store",  type="string",
					   help="Specify Book Author, use quotes.")
	cmdline.add_option("--category", dest="category", default="Fiction",
					   action="store",  type="string",
					   help="Specify Book Category. DEFAULT: Fiction")
	cmdline.add_option("--bookid", dest="bookid", default="FB0123456",
					   action="store",  type="string",
					   help="Specify eReader Book ID (unnecessary).")
	cmdline.add_option("--isbn", dest="isbn", default="123-0-1234-5678-9",
					   action="store",  type="string",
					   help="Specify Book ISBN (unnecessary).")

	#
	# How to calculate linebreaks
	#
	cmdline.add_option("-b", "--paragraphbreak", dest="linebreak",
					   default="auto", metavar="auto|cr|tab|space",
					   help="Specify how to break paragraphs: "
					        "'auto' (default) - best-guess algorithm. "
					        "'html' - paragraphs begin with HTML <p> tags. "
					        "'cr' - paragraphs begin after line break. "
					        "'tab' - paragraphs begin with tab-indented line. "
					        "'space' - paragraphs begin with multi-spaced "
					        " indented line."
					   )

	#
	# Spaces are removed to one-space, maybe we don't want to?
	#
	cmdline.add_option("--preserve-spaces", dest="preserve_spaces",
					   default=False, action="store_true",
					   help="Do not remove extra spaces between words. "
					        "DEFAULT is to remove spaces because the Reader "
					        "treats extra spaces as non-breaking, and this "
					        "interferes with proper justification"
					   )
	cmdline.add_option("--use-rdquotes", dest="use_rdquotes",
					   default=False, action="store_true",
					   help="Trust html &rdquote;|&ldquote; for curly quotes."
					        " DEFAULT is to NOT trust these tags, since many"
					        " OCRs get them wrong, and the built-in alorithmic"
					        " calculation is more accurate."
					   )
	#
	# Screen Dimensions
	#
	cmdline.add_option("--screenheight", dest="textheight", default=710,
					   action="store",  type="int", metavar="HEIGHT (pixels)",
					   help="Height of text \"screen\" area (pixels). "
					   "Note this is NOT the actual physical screen height "
					   "but the area text can be displayed within. "
					   "Using 710x550 for the 800x600 Sony Reader. "
					   "DEFAULT: 710")
	cmdline.add_option("--screenwidth", dest="textwidth", default=550,
					   action="store",  type="int", metavar="WIDTH (pixels)",
					   help="Width of text \"screen\" area (pixels). "
					   "DEFAULT: 550")

	#
	# Margins
	#
	cmdline.add_option("--topmargin", dest="topmargin", default=50,
					   action="store",  type="int",
					   help="Height of Top Margin (pixels). DEFAULT: 50")
	cmdline.add_option("--sidemargin", dest="sidemargin", default=20,
					   action="store",  type="int",
					   help="Width of Side Margins (pixels). DEFAULT: 20")

	#
	# Paragraph Settings
	#
	cmdline.add_option("--parindent", dest="parindent", default=200,
					   action="store",  type="int",
					   help="How far to Indent Paragraph's first line. "
					   "DEFAULT: 200")
	cmdline.add_option("--baselineskip", dest="baselineskip", default=100,
					   action="store",  type="int",
					   help="Spacing between Paragraphs. "
					   "DEFAULT: 100 (matches standard line spacing)")

	#
	# Font size and boldness
	#
	cmdline.add_option("--fontsize", dest="textsize", default=95,
					   action="store",  type="int",
					   metavar="FONTSIZE (points)",
					   help="Text Font size (points) - DEFAULT: 95")
	cmdline.add_option("--fontweight", dest="textweight", default=400,
					   action="store",  type="int",
					   metavar="FONTWEIGHT (strength)",
					   help="Text Font weight - DEFAULT 400, Bold is 800.")
	cmdline.add_option("--headerfontsize", dest="headsize", default=120,
					   action="store",  type="int",
					   metavar="FONTSIZE (points)",
					   help="Header Font size (points) - DEFAULT: 120")
	cmdline.add_option("--headerfontweight", dest="headweight", default=800,
					   action="store",  type="int",
					   metavar="FONTWEIGHT (strength)",
					   help="Header Font weight - DEFAULT 800, Roman is 400.")

	(options, args) = cmdline.parse_args()

	#
	# Arg Checking is handled here.
	# Any Global Variables are set in Main Program
	#
	infile  = options.infile
	outfile = options.outfile
	linebrk = options.linebreak

	if options.version :
		cmdline.print_help()
		print "pielrf version: ", VERSION
		return False

	if not infile :
		cmdline.print_help()
		print "No Input File Specified"
		return False
	if not outfile :
		cmdline.print_help()
		print "No Output File Specified"
		return False

	if linebrk != "auto" and linebrk != "cr" and linebrk != "html" and \
	   linebrk != "tab"  and linebrk != "space" :
		cmdline.print_help()
		print "Incorrect line break option specified: -b ", linebrk
		return False

	if not os.path.exists(infile) :
		cmdline.print_help()
		print "Input File \"", infile, "\" does not exist"
		return False

	#
	# Return now if we are not using verbose mode
	# Everything is okay here, so returning true
	#
	if not options.verbose :
		return True

	#
	# Verbose statement of argument values
	#
	print "Infile:  ", infile
	print "Outfile: ", outfile
	print "Title:   ", options.title
	print "Author:  ", options.author
	print "Category:", options.category
	print "ISBN:    ", options.isbn
	print "BookID:  ", options.bookid

	print "Options:"
	print "\tViewable Area:", \
		  options.textheight, "x", options.textwidth, \
		  "- [", options.topmargin, ", ", options.sidemargin, "]"
	print "\tText Font:    ", \
		  options.textsize, "(pixels) +", options.textweight, "(strength)"
	print "\tHeader Font:  ", \
		  options.headsize, "(pixels) +", options.headweight, "(strength)"
	print "\tParagraphs:   ", \
		   options.parindent, "(points Indent) +", \
		   options.baselineskip, "(points Baseline)"

	if linebrk == "auto" :
		print "\tParagraph Breaks determined automatically"
	else :
		print "\tParagraph Breaks determined by indentation with ", linebrk
	if options.use_rdquotes:
		print "\tUsing explicit HTML curly-quotes (&rdquo;|&ldquo;)"
	else :
		print "\tUsing algorithm for explicit curly-quotes"

	if options.preserve_spaces :
		print "\tPreserving extra spaces"
	else :
		print "\tEliminating extra spaces"

	print

	return True

# enddef parse_cmdline()


#############################################################
# def pielrf():												#
#															#
#############################################################
def pielrf():

	global cmdline
	global preserve_spaces
	global use_rdquotes

	if not parse_cmdline() :
		return

	(cmdopts, cmdargs) = cmdline.parse_args()

	# Locals
	infile          = cmdopts.infile
	outfile         = cmdopts.outfile
	# How to break paragraphs
	linebreak       = cmdopts.linebreak
	# Fonts
	textsize        = cmdopts.textsize
	textweight      = cmdopts.textweight
	headsize        = cmdopts.headsize
	headweight      = cmdopts.headweight
	# Screen/Margins
	textheight      = cmdopts.textheight
	textwidth       = cmdopts.textwidth
	topmargin       = cmdopts.topmargin
	sidemargin      = cmdopts.sidemargin
	# Paragraph Indent / Offset
	parindent       = cmdopts.parindent
	baselineskip    = cmdopts.baselineskip

	# Globals
	preserve_spaces = cmdopts.preserve_spaces
	use_rdquotes    = cmdopts.use_rdquotes
	verbose         = cmdopts.verbose

	#
	# Read whole file in all at once
	#
	f    = open(infile, 'rb')
	data = f.read()
	f.close()

	# topmargin=50, textheight=710, textwidth=550
	ps     = PageStyle(topmargin=topmargin,
					   textheight=textheight,
					   textwidth=textwidth)
	# sidemargin=20 matches Eragon from Connect Book Store
	bs     = BlockStyle(sidemargin=sidemargin)
	# Standard Text style and a Center text style
	ts     = TextStyle(parindent=parindent, baselineskip=baselineskip,
					   fontsize=textsize,   fontweight=textweight)
	cs     = TextStyle(align="center",      baselineskip=baselineskip,
					   fontsize=textsize,   fontweight=textweight)
	# No paragraph indent for Table of Contents
	tocTs  = TextStyle()
	# Chapter Header
	headTs = TextStyle(fontsize=headsize,fontweight=headweight,align="center")

	#
	# create the book
	#
	title       = cmdopts.title
	author      = cmdopts.author
	bookid      = cmdopts.bookid
	isbn        = cmdopts.isbn
	category    = cmdopts.category

	book = Book(title=title,
				author=(author, author),
				category=category,
				isbn=isbn,
				bookid=bookid)

	#
	# Create Table Of contents Page.
	# This should be filled with buttons
	#
	tocPage = book.Page(ps)
	tocHead = TextBlock(headTs, bs)
	tocText = TextBlock(tocTs,  bs)

	tocPage.append(tocHead)
	tocPage.append(tocText)

	book.addTocEntry(u"Table of Contents", tocText)
	tocHead.Paragraph("Table of Contents")
	tocHead.Paragraph()

	if verbose :
		print "Creating Chapters..."

	#
	# For each Chapter
	# Dilineated by <chapter>
	#
	first    = True
	chnum    = 1
	chapters = data.split("<chapter>")
	for chapter in range(len(chapters)) :
		if len(chapters[chapter]) == 0 or chapters[chapter].isspace():
			continue

		#
		# Do not know how the paragraphs are delimited
		# Pick the one which yeilds the greatest amount of
		# total paragraphs in "auto" mode, otherwise use
		# the mode specified
		#
		list_cr    = chapters[chapter].split("\n\n")
		list_tab   = chapters[chapter].split("\n\t")
		list_spc   = chapters[chapter].split("\n  ")
		list_html  = chapters[chapter].split("<p>")
		list       = list_cr

		if   linebreak == "cr" :
			list     = list_cr
		elif linebreak == "tab" :
			list     = list_tab
		elif linebreak == "space" :
			list     = list_spc
		elif linebreak == "html" :
			list = list_html
			for i in range(len(list)):
				list[i] = list[i].replace("</p>","").replace("</P>","")
		elif linebreak == "auto" :
			list     = list_cr
			if len(list_tab) > len(list):
				list = list_tab
			if len(list_spc) > len(list):
				list = list_spc
			if len(list_html) > len(list) :
				list = list_html.replace("</p>","").replace("</P>","")
		#endif

		#
		# The very first element is the name of the chapter, since
		# it had the <chapter> tag before it
		#
		if verbose :
			print "Chapter ", chnum, " has ", len(list), " paragraphs"
			chnum += 1
		#endif

		#
		# Create the attributes for the page.
		# A chapter header, a separator, text for the chapter
		#
		page       = book.Page(ps)
		headBlock  = page.TextBlock(headTs, bs);

		#
		# Make a Button on the TOC Page for this chapter
		#
		buttonText = make_button_text(list[0])
		tocText.Paragraph(CharButton(JumpButton(headBlock), buttonText))

		#
		# Add TOC Entries
		#
		if first :
			book.addTocEntry("Begin Reading", headBlock)
			first = False
		#endif
		book.addTocEntry(buttonText, headBlock)

		#
		# Now Parse Each Paragraph in the Chapter
		# The first Paragrah is the Chapter heading.
		#
		for i in range(len(list)):
			newstr   = make_curly_quotes(list[i].lstrip())
			newstr   = convert_misc(newstr)
			newstr   = convert_html_ampersands(newstr)
			finalstr = newstr
			if i == 0 :
				# First line is Chapter Header, followed by a Blank Line
				format_paragraph(page, headTs, headTs, bs, finalstr)
				textBlock = page.TextBlock(ts, bs)
				textBlock.Paragraph()
			else :
				# Subsequent lines are the Chapter text
				format_paragraph(page, ts, cs, bs, finalstr)

		# endfor - Each Paragraph

	#endfor - Each Chapter

	if verbose :
		print "Generating LRF..."

	# generate the lrf file
	book.renderLrf(outfile)

	if verbose :
		print "Done."

# enddef makelrf()


if __name__ == "__main__":
    pielrf()
