#!/usr/bin/env python
import getopt, os, re, sys

def main():
  print "The Pacify Suite of Tools brings you..."
  print "repartee v0.1 - Copyright 2009 Pax Librorum (www.PaxLibrorum.com)"
  print
  try:
	opts, args = getopt.getopt(sys.argv[1:], "hp:i:ov", ["help", "profile=", "input=", "output", "verbose"])
  except getopt.GetoptError, err:
	# print help information and exit:
	print str(err) # will print something like "option -a not recognized"
	usage()
	sys.exit(2)
  infilename = None
  outfilename = 'output.txt'
  for o, a in opts:
	if o == "-v":
	  verbose = True
	elif o in ("-h", "--help"):
	  usage()
	  sys.exit()
	elif o in ("-p", "--profile"):
	  profile = a
	  if profile != "en":
		if profile != "hu":
		  print "profile must either be \"en\" or \"hu\""
		  print
		  usage()
		  sys.exit(2)
	elif o in ("-i", "--input"):
	  infilename = a
	elif o in ("-o", "--output"):
	  outfilename = a
	else:
	  assert False, "unhandled option"
  
  if infilename == None:
    usage()
    sys.exit()
  
  infile = open(infilename)
  inbuffer = infile.read()
  infile.close()
  
  curDigit = ""
  prevDigit = ""
  collSpace = ""
  
  spaceList = []
  spaceDict = {}
  
  print "Parsing..."
  print
  
  for digit in inbuffer.strip():
    prevDigit = curDigit
    curDigit = digit
    
    if curDigit == ' ':
      collSpace += "s"
    elif curDigit == chr(8):
      collSpace += "t"
    elif curDigit == chr(10):
      collSpace += "r"
    elif curDigit == chr(13):
      collSpace += "n"
    else:
      if collSpace != "":
        spaceList.append(collSpace)
        if spaceDict.get(collSpace) > 0:
          spaceDict[collSpace] += 1
        else:
          spaceDict[collSpace] = 1
        collSpace = ""
  
  if collSpace != "":
    spaceList.append(collSpace)
    if spaceDict.get(collSpace) > 0:
      spaceDict[collSpace] += 1
    else:
      spaceDict[collSpace] = 1
    collSpace = ""
  
  wordspace = ""
  highest = 0
  for key in spaceDict:
    if spaceDict[key] > highest:
      highest  = spaceDict[key]
      wordspace = key
  top1 = highest
  
  linebreak = ""
  highest = 0
  for key in spaceDict:
    if spaceDict[key] > highest and key != wordspace:
      highest  = spaceDict[key]
      linebreak = key
  top2 = highest
  
  parbreak = ""
  highest = 0
  for key in spaceDict:
    if spaceDict[key] > highest and key != wordspace and key != linebreak:
      highest  = spaceDict[key]
      parbreak = key
  top3 = highest
  
  highest = 0
  for key in spaceDict:
    if spaceDict[key] > highest and key != wordspace and key != linebreak and key != parbreak:
      highest  = spaceDict[key]
  top4 = highest
  
  if top3 > (top4 * 10) and top2 > (top3 * 2) and top1 > (top2 * 10) and top4 != 0:
    print " word space: " + wordspace + " (" + str(top1) + ")"
    print " line break: " + linebreak + " (" + str(top2) + ")"
    print " paragraph break: " + parbreak + " (" + str(top3) + ")"
    print " ...: " + " (" + str(top4) + ")"
  else:
    print "Unable to find a clear and/or consistent line break / paragraph break pattern."
    print "File might already be fixed."
    sys.exit()
  
  print
  print "Regenerating..."
  print
  
  prevDigit = ""
  curDigit = ""
  outbuffer = ""
  collSpace = ""
  
  for digit in inbuffer.strip():
    prevDigit = curDigit
    curDigit = digit
    
    if curDigit == ' ':
      collSpace += "s"
    elif curDigit == chr(8):
      collSpace += "t"
    elif curDigit == chr(10):
      collSpace += "r"
    elif curDigit == chr(13):
      collSpace += "n"
    else:
      if collSpace != "":
        if collSpace == wordspace:
          outbuffer += " "
        elif collSpace == linebreak:
          outbuffer += " "
        elif collSpace == parbreak:
          outbuffer += chr(13) + chr(10) + chr(13) + chr(10)
        else:
          outbuffer += collSpace.replace('s', ' ').replace('t', chr(8)).replace('r', chr(10)).replace('n', chr(13))
        collSpace = ""
      outbuffer += curDigit
  
  if collSpace != "":
    if collSpace != "":
      if collSpace == wordspace:
        outbuffer += " "
      if collSpace == linebreak:
        outbuffer += " "
      if collSpace == parbreak:
        outbuffer += chr(13) + chr(10) + chr(13) + chr(10)
      collSpace = ""
  
  outfile = open(outfilename, 'w')
  print "Writing to " + outfilename + " ..."
  outfile.write(outbuffer)
  outfile.close()
  print "Done!"
  print
  
  sys.exit()


def usage():
  print "Usage: repartee -i input.txt -o output.txt"
  print

if __name__ == "__main__":
  main()