MobileRead Forums - View Single Post - k2pdfopt: optimizes PDFs for viewing on e-readers

schezuk · 08-06-2024, 02:05 AM

I wrote myself a dirty piece of code to rotate every CJK character by 90 degrees, which is designed to ignore Latin script if a word is wide enough. Not an effective implementation but with many magic numbers, though.

Code:

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import cv2
import numpy

def rotate_image(image, angle):
	(h, w) = image.shape[: 2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	corrected = cv2.warpAffine(image, M, (w, h), flags = cv2.INTER_CUBIC, borderMode = cv2.BORDER_REPLICATE)
	return corrected

def determine_score(arr):
	histogram = numpy.sum(arr, axis = 2, dtype = float)
	score = numpy.sum((histogram[..., 1 :] - histogram[..., : -1]) ** 2, axis = 1, dtype = float)
	return score

def correct_skew(monoImg, delta = 0.1, limit = 5):
	angles = numpy.arange(-limit, limit + delta, delta)
	img_stack = numpy.stack([rotate_image(monoImg, angle) for angle in angles], axis = 0)
	scores = determine_score(img_stack)
	best_angle = angles[numpy.argmax(scores)]
	return best_angle


orgImage = cv2.imread(sys.argv[1])
myImage = numpy.ascontiguousarray(numpy.rot90(orgImage))
(my_h, my_w, _) = myImage.shape

grayImg = cv2.cvtColor(myImage, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(grayImg, (0,0), sigmaX=33, sigmaY=33)
divide = cv2.divide(grayImg, blur, scale=255)
_, myImage = cv2.threshold(divide,127,255,cv2.THRESH_BINARY)
myImage = divide


at_bs = 5
monoImg = cv2.ximgproc.niBlackThreshold(grayImg, 255, cv2.THRESH_BINARY, at_bs, -0.3, binarizationMethod=cv2.ximgproc.BINARIZATION_NICK)
monoImg = cv2.adaptiveThreshold(monoImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
monoImg = cv2.bitwise_not(monoImg)

angle = correct_skew(monoImg)
print(angle)
if not -0.1 < angle < 0.1:
	monoImg = rotate_image(monoImg, angle)
	myImage = rotate_image(myImage, angle)

erosion_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
erosion = cv2.erode(monoImg, erosion_kernel, iterations = 1)
dilation_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 5))
dilation = cv2.dilate(erosion, dilation_kernel, iterations = 1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

dilation_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
dilation = cv2.dilate(monoImg, dilation_kernel, iterations = 1)


newImage = numpy.full(myImage.shape, 255, dtype=numpy.uint8)
for cnt in contours:
	x, y, w, h = cv2.boundingRect(cnt)
	
	ROI = dilation[y:y+h, x:x+w]
	# 1 1 0 1 1   # original 1 right pad
	# 1 1 1 0 1   # original 2 right pad
	ROI1 = numpy.c_[ROI, numpy.zeros(len(ROI),dtype=numpy.uint8)]
	#   1 1 0 1 1 # right shift 1
	#   1 1 1 0 1 # right shift 2
	ROI2 = numpy.c_[numpy.zeros(len(ROI),dtype=numpy.uint8), ROI]
	# 0 1 0 0 1 0 # AND 1
	# 0 1 1 0 0 0 # AND 2
	gap = cv2.bitwise_and(ROI1, ROI2) # gap between pixel
	#0 1 1 0 1 0  # reduced
	reducedArray = cv2.reduce(gap, 0, cv2.REDUCE_MAX, dtype=cv2.CV_8U)
	rcnt, length, i, left, started = [], len(reducedArray[0]), 0, 0, False
	while i < length:
		if reducedArray[0][i] >0:
			if not started:
				left = i-1
				started = True
		else:
			if started:
				rcnt.append((x+left, y, i-left, h)) # absolute coordination
				started = False
		i=i+1
	for rc in rcnt:
		_x, _y, _w, _h = rc
		if _h//_w>=2 or _w//_h>=3:
			continue
		xhigh = _x + _w//2 + _h//2
		yhigh = _y + _h//2 + _w//2
		xlow = xhigh - _h
		ylow = yhigh - _w
		if xlow <0:
			xlow = 0
			xhigh = _h
		if xhigh > my_w:
			xhigh = my_w
			xlow = xhigh - _h
		if ylow <0:
			ylow = 0
			yhigh = _w
		if yhigh > my_h:
			yhigh = my_h
			ylow = yhigh - _w
		if xlow >=0 and xhigh<=my_w and ylow >=0 and yhigh<=my_h:
			glyph = numpy.ascontiguousarray(numpy.rot90(myImage[_y:_y+_h, _x:_x+_w], -1))
			cv2.rectangle(myImage, (_x, _y), (_x+_w, _y+_h), (255, 255, 255), -1)
			newImage[ylow:yhigh, xlow:xhigh] = cv2.min(newImage[ylow:yhigh, xlow:xhigh], glyph)
cv2.imwrite(sys.argv[1]+".rotate.png", cv2.min(newImage,myImage))