View Single Post
Old 09-23-2008, 11:07 AM   #5
Junior Member
sjvr767 began at the beginning.
Posts: 5
Karma: 10
Join Date: Jun 2008
Device: iRex iLiad
Originally Posted by haridasi View Post
I have now tried to crop a pdf, but it doesn't crop the left side of the document. Furthermore, it takes some time guessing the correct percentage.
Hi there, I had a few minutes to spare and changed the way the new coordinates are determined. It should solve the "left-side" issue. This is more of a hack than a significant change, but I hope it helps. Code at the end of the document.

Before I give the code, I'd like to say that when I get time I will do a proper update of this. There are a few features I want to implement, such as splitting pages in half and then scaling those to A4. That should enlarge the doc quite a bit..

Here is the code:
#! /usr/bin/python

import subprocess
import getopt, sys
import find_lines
from pyPdf import PdfFileWriter, PdfFileReader

def usage ():
    print """sjvr767\'s PDF Cropping Script.
Example: -s -p 0.5 -i input.pdf -o output.pdf --skip --percent 0.5 -input input.pdf -output output.pdf
The factor by which to crop. Must be positive and less than or equal to 1.

The path to the file to be cropped.
Skip the first page. Ouptut file will not contain the first page of the input file.

Specify the name and path of the output file. If none specified, the script appends \'cropped\' to the file name.

def cut_length(dictionary, key, factor):
	cut_factor = 1-factor
	cut = dictionary[key]*cut_factor
	cut = cut / 4
	return cut
def new_coords(dictionary, key, cut):
	return abs(dictionary[key]-cut)
def new_coords2(ty, lx, rx, by, cut):
	new_ty = ty - cut
	new_by = by + cut
	new_lx = lx + cut
	new_rx = rx - cut
	top_left = {'x': new_lx, 'y': new_ty}
	bottom_left = {'x': new_lx, 'y': new_by}
	bottom_right = {'x': new_rx, 'y': new_by}
	top_right = {'x': new_rx, 'y': new_ty}
	return {'tr': top_right, 'tl': top_left, 'bl': bottom_left, 'br': bottom_right}

	opts, args = getopt.getopt(sys.argv[1:], "sp:i:o:sch", ["skip", "percent=", "input=", "output=", "column", "half"])
except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"

skipone = 0

for a in opts[:]:
	if a[0] == '-s' or a[0]=='--skip':
		skipone = 1

factor = 0.8 #default scaling factor

for a in opts[:]:
	if a[0] == '-p' or a[0]=='--factor':
		if a[1] != None:
				factor = float(a[1])
			except TypeError:
				print "Factor must be a number."
				sys.exit(2) #exit if no appropriate input file

input_file = None #no defualt input file
for a in opts[:]:
	if a[0] == '-i' or a[0]=='--input':
		if a[1] != None:
				if a[1][-4:]=='.pdf':
					input_file = a[1]
					print "Input file must be a PDF."
					sys.exit(2) #exit if no appropriate input file
			except TypeError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
			except IndexError:
				print "Input file must be a PDF."
				sys.exit(2) #exit if no appropriate input file
			print "Please speicfy an input file."
			sys.exit(2) #exit if no appropriate input file

output_file = "%s_cropped.pdf" %input_file[:-4] #default output

for a in opts[:]:
	if a[0] == '-o' or a[0]== '--output': 
		if a[1]!= None:
				if a[1][-4:]=='.pdf':
					output_file = a[1]
					print "Output file must be a PDF."
			except TypeError:
				print "Output file must be a PDF."
			except IndexError:
				print "Output file must be a PDF."

col = 0

for a in opts[:]:
	if a[0] == '-c' or a[0]=='--column':
		col = 1

half = 0

for a in opts[:]:
	if a[0] == '-h' or a[0]=='--half':
		half = 1

input1 = PdfFileReader(file(input_file, "rb"))

output = PdfFileWriter()
outputstream = file(output_file, "wb")

pages = input1.getNumPages()

top_right = {'x': input1.getPage(1).mediaBox.getUpperRight_x(), 'y': input1.getPage(1).mediaBox.getUpperRight_y()}

ty = input1.getPage(1).mediaBox.getUpperLeft_y()
lx = input1.getPage(1).mediaBox.getUpperLeft_x()
rx = input1.getPage(1).mediaBox.getLowerRight_x()
by = input1.getPage(1).mediaBox.getLowerRight_y()
print ty, lx, rx, by

cut = cut_length(top_right, 'x', factor)

newCoords = new_coords2(ty, lx, rx, by, cut)
new_tr = (newCoords['tr']['x'], newCoords['tr']['y'])
new_tl = (newCoords['tl']['x'], newCoords['tl']['y'])
new_br = (newCoords['br']['x'], newCoords['br']['y'])
new_bl = (newCoords['bl']['x'], newCoords['bl']['y'])

print new_tl[1], new_tl[0], new_bl[1], new_bl[0]

if skipone == 0 and col == 0 and half == 0:
	for i in range(0, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
elif skipone == 0 and col == 0 and half == 1:
	for i in range(0, pages-2):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br
		temp_output = PdfFileWriter()
		tos = file("temp.pdf", "wb")
		cmd = 'convert temp.pdf -density 8400 -colorspace Gray -contrast -contrast -contrast -colors 16 temp.gif', shell=True)
		height = find_lines.find_hline('temp.gif', 5, 80)
		page1 = input1.getPage(i)
		page1.mediaBox.upperLeft = new_tl
		page1.mediaBox.upperRight = new_tr
		page1.mediaBox.lowerLeft = (new_tl[0], new_tl[1]-height)
		page1.mediaBox.lowerRight = (new_tr[0], new_tr[1]-height)
		page2 = input1.getPage(i)
		page2.mediaBox.upperLeft = (new_tl[0], new_tl[1]-height)
		page2.mediaBox.upperRight = (new_tr[0], new_tr[1]-height)
		page2.mediaBox.lowerLeft = new_bl
		page2.mediaBox.lowerRight = new_br

elif skipone == 1 and col == 0 and half == 0:
	for i in range(1, pages):
		page = input1.getPage(i)
		page.mediaBox.upperLeft = new_tl
		page.mediaBox.upperRight = new_tr
		page.mediaBox.lowerLeft = new_bl
		page.mediaBox.lowerRight = new_br

sjvr767 is offline   Reply With Quote