Last active
August 13, 2025 05:54
-
-
Save saosir/8b73e96df9a894e9b1ee3e308619eb58 to your computer and use it in GitHub Desktop.
文字照片或者图片角度校正,方便 tesseract 进行 ocr 识别
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding:utf8 -*- | |
import sys | |
import os | |
import cv2 | |
import math | |
import logging | |
import numpy as np | |
""" | |
将倾斜的图片纠正,以便tesseract能够正确识别 | |
""" | |
def rotate_about_center(src, angle, scale=1.): | |
h, w = src_img.shape[:2] | |
rangle = np.deg2rad(angle) # angle in radians | |
# now calculate new image width and height | |
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale | |
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale | |
# ask OpenCV for the rotation matrix | |
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale) | |
# calculate the move from the old center to the new center combined | |
# with the rotation | |
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0])) | |
# the move only affects the translation, so update the translation | |
# part of the transform | |
rot_mat[0, 2] += rot_move[0] | |
rot_mat[1, 2] += rot_move[1] | |
dst = cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4) | |
cv2.imwrite('test_dst.jpg', dst) | |
def img_scale(img): | |
#缩放,加快处理速度 | |
h, w = img.shape[:2] | |
if h > 1024 or w> 1024: | |
if h > 1024: | |
rate = 1024 / float(h) | |
else: | |
rate = 1024 / float(w) | |
print "resize rate: ", rate | |
img = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC) | |
#cv2.imwrite('0.jpg', img) | |
#扩展白边,有助于较小图片的识别 | |
h, w = img.shape[:2] | |
if h < 1000: | |
border = (1000 - h)/2 | |
img = cv2.copyMakeBorder(img, border, border, 0, 0, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
if w < 1000: | |
border = (1000 - w)/2 | |
img = cv2.copyMakeBorder(img, 0, 0, border, border, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
return img | |
def shift_dft(src, dst=None): | |
''' | |
Rearrange the quadrants of Fourier image so that the origin is at | |
the image center. Swaps quadrant 1 with 3, and 2 with 4. | |
src and dst arrays must be equal size & type | |
''' | |
if dst is None: | |
dst = np.empty(src.shape, src.dtype) | |
elif src.shape != dst.shape: | |
raise ValueError("src and dst must have equal sizes") | |
elif src.dtype != dst.dtype: | |
raise TypeError("src and dst must have equal types") | |
if src is dst: | |
ret = np.empty(src.shape, src.dtype) | |
else: | |
ret = dst | |
h, w = src.shape[:2] | |
cx1 = cx2 = w/2 | |
cy1 = cy2 = h/2 | |
# if the size is odd, then adjust the bottom/right quadrants | |
if w % 2 != 0: | |
cx2 += 1 | |
if h % 2 != 0: | |
cy2 += 1 | |
# swap quadrants | |
# swap q1 and q3 | |
ret[h-cy1:, w-cx1:] = src[0:cy1 , 0:cx1 ] # q1 -> q3 | |
ret[0:cy2 , 0:cx2 ] = src[h-cy2:, w-cx2:] # q3 -> q1 | |
# swap q2 and q4 | |
ret[0:cy2 , w-cx2:] = src[h-cy2:, 0:cx2 ] # q2 -> q4 | |
ret[h-cy1:, 0:cx1 ] = src[0:cy1 , w-cx1:] # q4 -> q2 | |
if src is dst: | |
dst[:,:] = ret | |
return dst | |
def img_dft(img): | |
h, w = img.shape[:2] | |
realInput = img.astype(np.float64) | |
# perform an optimally sized dft | |
dft_M = cv2.getOptimalDFTSize(w) | |
dft_N = cv2.getOptimalDFTSize(h) | |
# copy A to dft_A and pad dft_A with zeros | |
dft_A = np.zeros((dft_N, dft_M, 2), dtype=np.float64) | |
dft_A[:h, :w, 0] = realInput | |
# no need to pad bottom part of dft_A with zeros because of | |
# use of nonzeroRows parameter in cv2.dft() | |
cv2.dft(dft_A, dst=dft_A, nonzeroRows=h) | |
# Split fourier into real and imaginary parts | |
image_Re, image_Im = cv2.split(dft_A) | |
# Compute the magnitude of the spectrum Mag = sqrt(Re^2 + Im^2) | |
magnitude = cv2.sqrt(image_Re**2.0 + image_Im**2.0) | |
# Compute log(1 + Mag) | |
log_spectrum = cv2.log(1.0 + magnitude) | |
# Rearrange the quadrants of Fourier image so that the origin is at | |
# the image center | |
shift_dft(log_spectrum, log_spectrum) | |
return log_spectrum | |
def img_threshold(img): | |
#规范值域 | |
cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX) | |
img = img.astype(np.uint8) | |
#cv2.imwrite('3.jpg', img) | |
#二值化 | |
ret, thresh=cv2.threshold(img, 150, 255, cv2.THRESH_BINARY) | |
#cv2.imwrite('4.jpg', thresh) | |
return thresh | |
def img_angle(img): | |
angle_ret = [] | |
lines = cv2.HoughLines(img, 1, np.pi/180, 80) | |
if lines == None: | |
print "Angle detect fail" | |
return angle_ret | |
#绘画直线图 | |
#lineimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |
#for rho, theta in lines[0]: | |
# a = np.cos(theta) | |
# b = np.sin(theta) | |
# x0 = a * rho | |
# y0 = b * rho | |
# x1 = int(x0 + 1000 * (-b)) | |
# y1 = int(y0 + 1000 * (a)) | |
# x2 = int(x0 - 1000 * (-b)) | |
# y2 = int(y0 - 1000 * (a)) | |
# cv2.line(lineimg, (x1, y1), (x2, y2), (255, 0, 0), 3) | |
#cv2.imwrite('6.jpg', lineimg) | |
#角度计算 | |
h, w = img.shape[:2] | |
angle = 0 | |
piThresh = np.pi/90 | |
pi2 = np.pi/2 | |
for rho, theta in lines[0]: | |
if abs(theta) < piThresh or abs(theta - pi2) < piThresh: | |
continue; | |
else: | |
angle = theta | |
break | |
angle = angle < pi2 and angle or angle - np.pi | |
if angle != pi2: | |
angleT = h * math.tan(angle) / w | |
angle = math.atan(angleT) | |
angleD = angle * 180 / np.pi | |
if angleD > 45.0: | |
angleD2 = angleD - 90 | |
angle2 = angleD * np.pi / 180 | |
angle_ret.append( (angle2, angleD2) ) | |
elif angleD < -45.0: | |
angleD2 = angleD + 90 | |
angle2 = angleD * np.pi / 180 | |
angle_ret.append( (angle2, angleD2) ) | |
elif math.fabs(angleD) < 1.5: | |
angleD = 0 | |
angle = 0 | |
angle_ret.append( (angle, angleD) ) | |
# print angle_ret | |
return angle_ret | |
def img_rotate(filename, angle_list): | |
img = cv2.imread(filename, cv2.CV_LOAD_IMAGE_COLOR) | |
img_ret = [] | |
for angle, angleD in angle_list: | |
h, w = img.shape[:2] | |
sinVal = math.fabs(math.sin(angle)) | |
cosVal = math.fabs(math.cos(angle)) | |
dx = (int)((w * cosVal + h * sinVal - w)/2) | |
dy = (int)((w * sinVal + h * cosVal - h)/2) | |
if dx < 0: | |
dx = 0 | |
if dy < 0: | |
dy = 0 | |
dst_img = cv2.copyMakeBorder(img, dy, dy, dx, dx, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
h, w = dst_img.shape[:2] | |
M = cv2.getRotationMatrix2D((w/2, h/2), angleD, 1.0) | |
dst_img = cv2.warpAffine(dst_img, M, (w, h), borderValue = (255, 255,255)) | |
img_ret.append(dst_img) | |
return img_ret | |
def ImageFix(infile): | |
outfiles = [] | |
src_img = cv2.imread(infile, cv2.CV_LOAD_IMAGE_GRAYSCALE) | |
if src_img == None: | |
logging.warning("No such file %s", os.path.basename(infile)) | |
return outfiles | |
#缩放扩展 | |
scale_img = img_scale(src_img) | |
#dft | |
dft_img = img_dft(scale_img) | |
#二值化 | |
thres_img = img_threshold(dft_img) | |
#Hough直线检测、角度计算 | |
angle_list = img_angle(thres_img) | |
#旋转校正 | |
img_list = img_rotate(infile, angle_list) | |
fname, fextension = os.path.splitext(infile) | |
for i, img in enumerate(img_list): | |
filename = fname + '_' + str(i) + fextension | |
logging.debug("ImageFix output %s", os.path.basename(filename)) | |
cv2.imwrite(filename, img) | |
outfiles.append(filename) | |
return outfiles | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print "Arguments number error" | |
exit(-1) | |
src_img = cv2.imread(sys.argv[1], cv2.CV_LOAD_IMAGE_GRAYSCALE) | |
if src_img == None: | |
print "No such file" | |
exit(-1) | |
#旋转 | |
#rotate_about_center(src_img, 15) | |
#缩放扩展 | |
scale_img = img_scale(src_img) | |
#dft | |
dft_img = img_dft(scale_img) | |
#二值化 | |
thres_img = img_threshold(dft_img) | |
#Hough直线检测、角度计算 | |
angle_list = img_angle(thres_img) | |
#旋转校正 | |
img_list = img_rotate(sys.argv[1], angle_list) | |
# fw =open('/root/unpack/img_rotate.txt','w') | |
#输出 | |
i = 0 | |
fname, fextension = os.path.splitext(sys.argv[1]) | |
for img in img_list: | |
filename = fname + '_' + str(i) + fextension | |
# print "output: ", filename | |
# print >>fw,filename | |
cv2.imwrite(filename, img) | |
i += 1 | |
# fw.close() | |
exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment