View Single Post
Old 06-28-2008, 07:51 AM   #1
sjvr767
Junior Member
sjvr767 began at the beginning.
 
Posts: 5
Karma: 10
Join Date: Jun 2008
Device: iRex iLiad
Yet another PDF cropping tool

Hi,

This is my first post and I thought I'd share a script I use for cropping PDFs, so that they'll display better on my iLiad.

I'm pretty sure a lot people on here have done this or something similar, but I created the script a month or two ago after finding that some journal papers didn't want to be cropped using the normal pdfcrop tool.

The script is based heavily on the cropping section of the example on the pyPdf homepage.

To run the script, you will need python and pyPdf (available for most linux distros)... With that installed, just copy the below code into a file and make executable.


Code:
#! /usr/bin/python

import getopt, sys
from pyPdf import PdfFileWriter, PdfFileReader

def usage ():
    print """sjvr767\'s PDF Cropping Script.
Example:
my_pdf_crop.py -s -p 0.5 -i input.pdf -o output.pdf
my_pdf_crop.py --skip --percent 0.5 -input input.pdf -output output.pdf
\n
REQUIRED OPTIONS:
-p\t--percent
The factor by which to crop. Must be positive and less than or equal to 1.

-i\t--input
The path to the file to be cropped.
\n
OPTIONAL:
-s\t--skip
Skip the first page. Ouptut file will not contain the first page of the input file.

-o\t--output
Specify the name and path of the output file. If none specified, the script appends \'cropped\' to the file name.
"""
    sys.exit(0)

def cut_length(dictionary, key, factor):
	cut_factor = 1-factor
	cut = dictionary[key]*cut_factor
	cut = cut / 4
	return cut
	
def new_coords(dictionary, key, cut):
	return abs(dictionary[key]-cut)

try:
	opts, args = getopt.getopt(sys.argv[1:], "sp:i:o:s", ["skip", "percent=", "input=", "output="])
except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

skipone = 0

for a in opts[:]:
	if a[0] == '-s' or a[0]=='--skip':
		skipone = 1

factor = 0.8 #default scaling factor

for a in opts[:]:
	if a[0] == '-p' or a[0]=='--factor':
		if a[1] != None:
			try:
				factor = float(a[1])
			except TypeError:
				print "Factor must be a number."
				sys.exit(2) #exit if no appropriate input file

input_file = None #no defualt input file
		
for a in opts[:]:
	if a[0] == '-i' or a[0]=='--input':
		if a[1] != None:
			try:
				if a[1][-4:]=='.pdf':
					input_file = a[1]
				else:
					print "Input file must be a PDF."
					sys.exit(2) #exit if no appropriate input file
			except TypeError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
			except IndexError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
		else:
			print "Please speicfy an input file."
			sys.exit(2) #exit if no appropriate input file

output_file = "%s_cropped.pdf" %input_file[:-4] #default output

for a in opts[:]:
	if a[0] == '-o' or a[0]=='--output': 
		if a[1]!= None:
			try:
				if a[1][-4:]=='.pdf':
					output_file = a[1]
				else:
					print "Output file must be a PDF."
			except TypeError:
				print "Output file must be a PDF."
			except IndexError:
				print "Output file must be a PDF."


input1 = PdfFileReader(file(input_file, "rb"))

output = PdfFileWriter()
outputstream = file(output_file, "wb")

pages = input1.getNumPages()

top_right = {'x': input1.getPage(1).mediaBox.getUpperRight_x(), 'y': input1.getPage(1).mediaBox.getUpperRight_y()}
top_left = {'x': input1.getPage(1).mediaBox.getUpperLeft_x(), 'y': input1.getPage(1).mediaBox.getUpperLeft_y()}
bottom_right = {'x': input1.getPage(1).mediaBox.getLowerRight_x(), 'y': input1.getPage(1).mediaBox.getLowerRight_y()}
bottom_left = {'x': input1.getPage(1).mediaBox.getLowerLeft_x(), 'y': input1.getPage(1).mediaBox.getLowerLeft_y()}

cut = cut_length(top_right, 'x', factor)

new_tr = (new_coords(top_right, 'x', cut), new_coords(top_right, 'y', cut))
new_br = (new_coords(bottom_right, 'x', cut), new_coords(bottom_right, 'y', cut))
new_tl = (new_coords(top_left, 'x', cut), new_coords(top_left, 'y', cut))
new_bl = (new_coords(bottom_left, 'x', cut), new_coords(bottom_left, 'y', cut))

if skipone == 0:
	for i in range(0, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
		output.addPage(page)
else:
	for i in range(1, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
		output.addPage(page)

output.write(outputstream)
outputstream.close()
If you paste it into a file called "my_pdfcrop.py" then you can crop a file by approximately 20% using the command,

Code:
./my_pdfcrop.py -p 0.8 -i input.pdf
Hope someone finds this useful and let me know if you find a bug or have a suggestion.
sjvr767 is offline   Reply With Quote