View Single Post
Old 02-14-2009, 07:04 AM   #8
sjvr767
Junior Member
sjvr767 began at the beginning.
 
Posts: 5
Karma: 10
Join Date: Jun 2008
Device: iRex iLiad
small update

Hi there,

Sorry this has taken so long. I have a lot going on in my life right now, but I managed to do a bit of code clean-up (not much) and I added the ability to specify manual cropping in addition to the proportional cropping.

Therefore, you can now tweak the cropping slightly. For example, I have paper called "systemic_risk.pdf",

Code:
./my_crop.py -s -p 0.7 -i systemic_risk.pdf -o systemic_risk2.pdf
and after checking the output file (systemic_risk2.pdf), I see that I'd like to crop the left, top and bottom side a bit more... Then I can go,

Code:
./my_crop.py -s -p 0.7 -i systemic_risk.pdf -o systemic_risk2.pdf -m "15 50 0 50"
Which will crop the left side by 15, the top by 50, the right by 0 and the bottom by 50.

BTW, the script now outputs the dimension of the first page... You can use that in order to give yourself an idea as to how much to crop manually. Also, you can do pure manual cropping by specifying -p 1.

If you have pyPDF, just cut and paste the following code into a file called "my_crop.py" and make it executable:

Code:
#! /usr/bin/python

import getopt, sys
from pyPdf import PdfFileWriter, PdfFileReader

def usage ():
    print """sjvr767\'s PDF Cropping Script.
Example:
my_pdf_crop.py -s -p 0.5 -i input.pdf -o output.pdf
my_pdf_crop.py --skip --percent 0.5 -input input.pdf -output output.pdf
\n
REQUIRED OPTIONS:
-p\t--percent
The factor by which to crop. Must be positive and less than or equal to 1.

-i\t--input
The path to the file to be cropped.
\n
OPTIONAL:
-s\t--skip
Skip the first page. Ouptut file will not contain the first page of the input file.

-o\t--output
Specify the name and path of the output file. If none specified, the script appends \'cropped\' to the file name.

-m\t--margin
Specify additional absolute cropping, for fine tuning results.
\t-m "left top right bottom"
"""
    sys.exit(0)

def cut_length(dictionary, key, factor):
	cut_factor = 1-factor
	cut = float(dictionary[key])*cut_factor
	cut = cut / 4
	return cut
		
def new_coords(dictionary, key, cut, margin, code = "tl"):
	if code == "tl":
		if key == "x":
			return abs(float(dictionary[key])+(cut+margin["l"]))
		else:
			return abs(float(dictionary[key])-(cut+margin["t"]))
	elif code == "tr":
		if key == "x":
			return abs(float(dictionary[key])-(cut+margin["r"]))
		else:
			return abs(float(dictionary[key])-(cut+margin["t"]))
	elif code == "bl":
		if key == "x":
			return abs(float(dictionary[key])+(cut+margin["l"]))
		else:
			return abs(float(dictionary[key])+(cut+margin["b"]))
	else:
		if key == "x":
			return abs(float(dictionary[key])-(cut+margin["r"]))
		else:
			return abs(float(dictionary[key])+(cut+margin["b"]))

try:
	opts, args = getopt.getopt(sys.argv[1:], "sp:i:o:m:", ["skip", "percent=", "input=", "output=", "margin="])
except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

skipone = 0

for a in opts[:]:
	if a[0] == '-s' or a[0]=='--skip':
		skipone = 1

factor = 0.8 #default scaling factor

for a in opts[:]:
	if a[0] == '-p' or a[0]=='--factor':
		if a[1] != None:
			try:
				factor = float(a[1])
			except TypeError:
				print "Factor must be a number."
				sys.exit(2) #exit if no appropriate input file

input_file = None #no defualt input file
		
for a in opts[:]:
	if a[0] == '-i' or a[0]=='--input':
		if a[1] != None:
			try:
				if a[1][-4:]=='.pdf':
					input_file = a[1]
				else:
					print "Input file must be a PDF."
					sys.exit(2) #exit if no appropriate input file
			except TypeError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
			except IndexError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
		else:
			print "Please speicfy an input file."
			sys.exit(2) #exit if no appropriate input file

output_file = "%s_cropped.pdf" %input_file[:-4] #default output

for a in opts[:]:
	if a[0] == '-o' or a[0]=='--output': 
		if a[1]!= None:
			try:
				if a[1][-4:]=='.pdf':
					output_file = a[1]
				else:
					print "Output file must be a PDF."
			except TypeError:
				print "Output file must be a PDF."
			except IndexError:
				print "Output file must be a PDF."

margin = {"l": 0, "t": 0, "r": 0, "b": 0}

for a in opts[:]:
	if a[0] == '-m' or a[0]=='--margin':
		if a[1]!= None:
			m_temp = a[1].strip("\"").split()
			margin["l"] = float(m_temp[0])
			margin["t"] = float(m_temp[1])
			margin["r"] = float(m_temp[2])
			margin["b"] = float(m_temp[3])
		else:
			print "Error"

input1 = PdfFileReader(file(input_file, "rb"))

output = PdfFileWriter()
outputstream = file(output_file, "wb")

pages = input1.getNumPages()

top_right = {'x': input1.getPage(1).mediaBox.getUpperRight_x(), 'y': input1.getPage(1).mediaBox.getUpperRight_y()}
top_left = {'x': input1.getPage(1).mediaBox.getUpperLeft_x(), 'y': input1.getPage(1).mediaBox.getUpperLeft_y()}
bottom_right = {'x': input1.getPage(1).mediaBox.getLowerRight_x(), 'y': input1.getPage(1).mediaBox.getLowerRight_y()}
bottom_left = {'x': input1.getPage(1).mediaBox.getLowerLeft_x(), 'y': input1.getPage(1).mediaBox.getLowerLeft_y()}

print('Page dim.\t%f by %f' %(top_right['x'], top_right['y']))

cut = cut_length(top_right, 'x', factor)

new_tr = (new_coords(top_right, 'x', cut, margin, code = "tr"), new_coords(top_right, 'y', cut, margin, code = "tr"))
new_br = (new_coords(bottom_right, 'x', cut, margin, code = "br"), new_coords(bottom_right, 'y', cut, margin, code = "br" ))
new_tl = (new_coords(top_left, 'x', cut, margin, code = "tl"), new_coords(top_left, 'y', cut, margin, code = "tl"))
new_bl = (new_coords(bottom_left, 'x', cut, margin, code = "bl"), new_coords(bottom_left, 'y', cut, margin, code = "bl"))

if skipone == 0:
	for i in range(0, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
		output.addPage(page)
else:
	for i in range(1, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
		output.addPage(page)

output.write(outputstream)
outputstream.close()
sjvr767 is offline   Reply With Quote