#!/usr/bin/python

# Extract free-hand annotation from Sony PRS600 reader, and 
# Export to a jarnal archive:
# http://www.dklevine.com/general/software/tc1000/jarnal.htm
#
# Usage: annot.py </path/to/prs600>
# e.g:   annot.py /media/READER
#
# Warning, this file is provided as is, and there is no guarantee
# that it will work, or even that it will preserve your files. 
# Use it as your own risk...
#
# Copyright: Cedric Pradalier <cedric.pradalier@free.fr>
# Date: October 17th, 2009
# License: public domain

import pyPdf
import os
import sys
import base64
import shutil
import tempfile
import xml.dom.minidom

def getText(nodelist):
	rc = ""
	for node in nodelist:
		if node.nodeType == node.TEXT_NODE:
			rc = rc + node.data
	return rc



class Annotation:
	def __init__(self,node):
		self.width = int(node.getAttribute("width"))
		self.height = int(node.getAttribute("height"))
		self.page = int(node.getAttribute("page"))
		self.pageOffset = int(node.getAttribute("pageOffset"))
		self.pages = int(node.getAttribute("pages"))
		self.text = node.getAttribute("name")
		start = node.getElementsByTagName("start")[0]
		text = getText(start.childNodes)
		b64 = base64.b64decode(text).translate(None,"\x00")
		# print "%s: %s" % (text,b64)
		self.start = b64
		end = node.getElementsByTagName("end")[0]
		text = getText(end.childNodes)
		b64 = base64.b64decode(text).translate(None,"\x00")
		# print "%s: %s" % (text,b64)
		self.end = b64 

	def __str__(self):
		return "Page %d+%d of %d (%dx%d): %s\n\t from %s to %s " % \
				(self.page,self.pageOffset,self.pages,\
				self.width,self.height,	repr(self.text),\
				str(self.start),str(self.end))

class Bookmark:
	def __init__(self,node):
		self.width = int(node.getAttribute("width"))
		self.height = int(node.getAttribute("height"))
		self.page = int(node.getAttribute("page"))
		self.pageOffset = int(node.getAttribute("pageOffset"))
		self.pages = int(node.getAttribute("pages"))
		self.text = ""
		comment = node.getElementsByTagName("comment")
		if len(comment)>0:
			comment = comment[0]
			self.text = getText(comment.childNodes)
		mark = node.getElementsByTagName("mark")[0]
		text = getText(mark.childNodes)
		b64 = base64.b64decode(text).translate(None,"\x00")
		# print "%s: %s" % (text,b64)
		self.mark = b64

	def __str__(self):
		return "Page %d+%d of %d (%dx%d): %s\n\t mark %s " % \
				(self.page,self.pageOffset,self.pages,\
				self.width,self.height,	repr(self.text),\
				str(self.mark))

	def __repr__(self):
		if self.pageOffset:
			return "Page %d+%d of %d: %s" % \
					(self.page,self.pageOffset,self.pages, str(self.text))
		else:
			return "Page %d of %d: %s" % \
					(self.page,self.pages, str(self.text))

class FreeHand:
	def __init__(self,node):
		svg = node.getElementsByTagName("svgFile")[0]
		self.svgfile = getText(svg.childNodes)
		self.svgwidth = int(svg.getAttribute("width"))
		self.svgheight = int(svg.getAttribute("height"))
		self.width = int(node.getAttribute("width"))
		self.height = int(node.getAttribute("height"))
		self.page = int(node.getAttribute("page"))
		self.pageOffset = int(node.getAttribute("pageOffset"))
		self.pages = int(node.getAttribute("pages"))

	def __str__(self):
		return "Page %d+%d of %d (%dx%d): %s (%dx%d)" % \
				(self.page,self.pageOffset,self.pages,self.width,self.height,\
				self.svgfile,self.svgwidth,self.svgheight)

	def createJarnalSVG(self,svgdir,bgname,pwidth,pheight):
		jarnalsvg = """
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg width="%dpx" height="%dpx" xmlns="http://www.w3.org/2000/svg">
	<title>Jarnal document - see http://www.dklevine/general/software/tc1000/jarnal.htm for details</title>
	<desc>
		[Jarnal Page Parameters]
		paper=Plain
		lines=25
		height=%d
		width=%d
		bg=1
		transparency=100
		bcolor=-1
		bgtext=false
		bgfade=0
		bgrotate=0
		bgscale=1.0
		bgid=%s
		bgindex=%d
		pageref=pageref-0000001
	</desc>
		""" % (pwidth,pheight,pheight, pwidth,bgname,self.page)
		jarnalStyle = 'stroke="red" stroke-width="1.32" fill="none"'
		path = svgdir + "/" + self.svgfile
		xoff = -20
		yoff = 0
		xscale =  pwidth/float(self.svgwidth-40) * 1.0
		yscale =  pheight/float(self.svgheight) * 1.0
		#print "Scale (%d,%d)->(%d,%d): %f %f" % (self.svgwidth,self.svgheight,\
		#		pwidth,pheight,xscale,yscale)
		dom = xml.dom.minidom.parse(path)
		svg = dom.getElementsByTagName("svg")[0]
		pl = svg.getElementsByTagName("polyline")
		for p in pl:
			points = p.getAttribute("points").split()
			path = ""
			for d in points:
				xy=d.split(",")
				path += "L%.1f %.1f " % (float(xy[0])*xscale+xoff,float(xy[1])*yscale+yoff)
			if len(path) > 0:
				path = "M" + path[1:]
				jarnalsvg += '<path d="%s %s/>\n"' % (path,jarnalStyle) 
		jarnalsvg += "</svg>\n"
		jarnalfile = "p%d.svg" % self.page
		return jarnalfile,jarnalsvg




class Prs600Text:
	def __init__(self,topdir,node):
		self.topdir = topdir
		self.path = node.getAttribute("path")
		self.filename = self.path.split("/")[-1]
		self.bgname="background-383160434."+self.filename
		self.annot = []
		self.bookmarks = []
		self.svgpages = {}
		self.freehand = []
		# print "Selecting %s" % self.path
		markup = node.getElementsByTagName("markups")
		# print markup
		if len(markup)>0:
			markup = markup[0]
			annot = markup.getElementsByTagName("annotation")
			for a in annot:
				an = Annotation(a)
				self.annot.append(an)
			# print self.annot
			freehand = markup.getElementsByTagName("freehand")
			for f in freehand:
				fh = FreeHand(f)
				self.freehand.append(fh)
			# print self.freehand
			bookmarks = markup.getElementsByTagName("bookmark2")
			for f in bookmarks:
				bm = Bookmark(f)
				self.bookmarks.append(bm)
			# print self.bookmarks
		fp = open(self.topdir+"/"+self.path, "rb")
		infile = pyPdf.PdfFileReader(fp)
		self.numpages = infile.getNumPages()
		page = infile.getPage(0)
		self.width = float(page.mediaBox.getUpperRight_x())
		self.height = float(page.mediaBox.getUpperRight_y())

	def disp(self):
		print "Annotations:"
		for a in self.annot:
			print str(a)
		print "FreeHand:"
		for f in self.freehand:
			print str(f)

	def defaultJarnalSVG(self,page):
		jarnalsvg = """
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg width="%dpx" height="%dpx" xmlns="http://www.w3.org/2000/svg">
	<title>Jarnal document - see http://www.dklevine/general/software/tc1000/jarnal.htm for details</title>
	<desc>
		[Jarnal Page Parameters]
		paper=Plain
		lines=25
		height=%d
		width=%d
		bg=1
		transparency=100
		bcolor=-1
		bgtext=false
		bgfade=0
		bgrotate=0
		bgscale=1.0
		bgid=%s
		bgindex=%d
		pageref=pageref-0000001
	</desc>
</svg>
		""" % (self.width,self.height,self.height,self.width,self.bgname,page)
		return jarnalsvg

	def createJarnalConf(self,pdfname):
		jarnalconf = """[Globals]
pageNumber=1\nscale=1.0\nhighlightLines=false
highlighterStyle=translucent\nfitWidth=false\nviewQuality=64\nbWidth=2.2
activePage=0\nthumbs=true\nthreeup=true\npaper=Plain\nlines=25
height=%d\nwidth=%d\nbg=1\ntransparency=100\nbcolor=-1\nbgtext=false
bgfade=0\nbgrotate=0\nbgscale=1.0\nbgid=%s\nbgindex=-1
windowHeight=983.0\nwindowWidth=824.0\ndivwidth=90\noutheight=90
obgcolor=blue\nfoffX=0\nfoffY=0\nwindowX=72.0\nwindowY=25.0\ntextMode=false
pencentric=false\nsaveOnExit=true\nsaveSelfexecuting=false\nminScrShot=true
allScr=false\nsaveUserInfo=false\nupdateBookmarks=true\nsaveBg=true
smoothStrokes=true\ndefaultText=font-family="Vera" font-size="10" fill="red" 
defaultOverlay=rx="0" ry="0" fill="white" stroke="gray" stroke-width="0" fill-opacity="0.9" stroke-opacity="1.0"
alignToMargins=false\nbestFit=true\nabsoluteScale=true\nshowPageNumbers=false
withBorders=false\nURLEncode=false\npromptForNetSaveName=false
stickyRuler=false\narrowhead=false\nmarkerweight=10\nmiddleButton=Eraser

[Background List]
%s=%s

[Background %s]\nisRepeating=false\nastate=1\ninternalScale=1.0
		""" % (self.height,self.width,self.bgname,self.bgname,\
				self.bgname,self.bgname)
		return jarnalconf


	def createJarnalSVG(self,svgdir):
		self.svgpages = {}
		for i in range(0,self.numpages):
			file = "p%d.svg" % i
			self.svgpages[file] = self.defaultJarnalSVG(i)
		for f in self.freehand:
			# print "Exporting " +str(f)
			p,t = f.createJarnalSVG(svgdir,self.bgname,self.width,self.height)
			self.svgpages[p] = t

	def saveJarnalSVG(self,filename):
		try:
			svgdir = self.topdir + "/database/markup/" + self.path
			self.createJarnalSVG(svgdir)
		except:
			svgdir = self.topdir + "/Sony Reader/markup/" + self.path
			self.createJarnalSVG(svgdir)

		dname = tempfile.mkdtemp()
		f = open(dname + "/jarnal.conf","w")
		f.write(self.createJarnalConf(self.filename))
		f.close()
		for k in self.svgpages.keys():
			f = open(dname + "/" + k,"w")
			f.write(self.svgpages[k])
			f.close()
		bgname=dname+"/"+self.bgname
		pdffile=self.topdir+"/"+self.path
		shutil.copyfile(pdffile,bgname)
		cwd = os.getcwd()
		if filename[0] != "/":
			filename = cwd + "/" + filename
		os.chdir(dname)
		try:
			os.unlink(filename)
		except:
			pass
		svglist = " ".join([("p%d.svg") % i for i in range(0,self.numpages)])
		os.system("zip \"%s\" jarnal.conf %s *.pdf" % (filename,svglist))
		print "Created file " + filename
		os.unlink(dname + "/jarnal.conf")
		os.unlink(bgname)
		for k in self.svgpages.keys():
			os.unlink(dname + "/" + k)
		os.rmdir(dname)
		os.chdir(cwd)


class Prs600Markup:

	def __init__(self,topdir):
		self.topdir = topdir

		try:
			self.dom = xml.dom.minidom.parse(self.topdir+\
					"/database/cache/cacheExt.xml")
		except:
			self.dom = xml.dom.minidom.parse(self.topdir+\
					"/Sony Reader/database/cacheExt.xml")
			
		ce = self.dom.getElementsByTagName("cacheExt")[0]
		self.tl = ce.getElementsByTagName("text")
		self.text = None

	def listFiles(self):
		result = []
		for f in self.tl:
			path = f.getAttribute("path")
			# print "Considering " + path
			try:
				ext = path.split(".")[-1]
				if ext.upper() != "PDF":
					continue
			except:
				continue
			# print "Loading XML"
			text = Prs600Text(self.topdir,f)
			if (len(text.freehand) == 0) and \
				(len(text.annot) == 0) and \
				(len(text.bookmarks) == 0):
				continue
			# print "Accepted"
			result.append(path)
		return result


	def selectFileById(self,id):
		self.text = Prs600Text(self.topdir,self.tl[id])
		return self.text

	def selectFileByPath(self,name):
		self.text = None
		# print "Looking for " + name
		for f in self.tl:
			path = f.getAttribute("path")
			if path == name:
				self.text = Prs600Text(self.topdir,f)
		return self.text


# topdir="/home/cedricp/documents/PRS600"
try:
	topdir = sys.argv[1]
except:
	topdir = ""

if len(topdir.strip())==0:
	print "Usage: %s <path to PRS600 base directory>" % sys.argv[0]
	sys.exit(0)

p = Prs600Markup(topdir)
tl = p.listFiles()

while True:
	for i in range(0,len(tl)):
		print "%3d: %s" %(i+1,tl[i])
		t = p.selectFileByPath(tl[i])
		for bm in t.bookmarks:
			print "\t\t" + repr(bm)
	print "%3d: Exit" % 0
	try:
		s = int(raw_input("Selection: ")) - 1
		if s >= len(tl):
			print "Invalid input"
			continue
	except:
		print "Invalid input"
		continue
	if s < 0:
		break
	t = p.selectFileByPath(tl[s])
	defname = t.filename.split(".")
	if len(defname)>1:
		defname = ".".join(defname[0:-1]) + ".jaj"
	else:
		defname = t.filename + ".jaj"
	rename = raw_input("Save as [%s]: " % defname).strip()
	if len(rename) != 0:
		defname = rename
	t.saveJarnalSVG(defname)



