Skip to content

Instantly share code, notes, and snippets.

@saosir
Last active August 13, 2025 05:54
Show Gist options
  • Save saosir/8b73e96df9a894e9b1ee3e308619eb58 to your computer and use it in GitHub Desktop.
Save saosir/8b73e96df9a894e9b1ee3e308619eb58 to your computer and use it in GitHub Desktop.
文字照片或者图片角度校正,方便 tesseract 进行 ocr 识别
#!/usr/bin/python
# -*- coding:utf8 -*-
import sys
import os
import cv2
import math
import logging
import numpy as np
"""
将倾斜的图片纠正,以便tesseract能够正确识别
"""
def rotate_about_center(src, angle, scale=1.):
h, w = src_img.shape[:2]
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
dst = cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
cv2.imwrite('test_dst.jpg', dst)
def img_scale(img):
#缩放,加快处理速度
h, w = img.shape[:2]
if h > 1024 or w> 1024:
if h > 1024:
rate = 1024 / float(h)
else:
rate = 1024 / float(w)
print "resize rate: ", rate
img = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
#cv2.imwrite('0.jpg', img)
#扩展白边,有助于较小图片的识别
h, w = img.shape[:2]
if h < 1000:
border = (1000 - h)/2
img = cv2.copyMakeBorder(img, border, border, 0, 0, cv2.BORDER_CONSTANT, value = (255, 255, 255))
if w < 1000:
border = (1000 - w)/2
img = cv2.copyMakeBorder(img, 0, 0, border, border, cv2.BORDER_CONSTANT, value = (255, 255, 255))
return img
def shift_dft(src, dst=None):
'''
Rearrange the quadrants of Fourier image so that the origin is at
the image center. Swaps quadrant 1 with 3, and 2 with 4.
src and dst arrays must be equal size & type
'''
if dst is None:
dst = np.empty(src.shape, src.dtype)
elif src.shape != dst.shape:
raise ValueError("src and dst must have equal sizes")
elif src.dtype != dst.dtype:
raise TypeError("src and dst must have equal types")
if src is dst:
ret = np.empty(src.shape, src.dtype)
else:
ret = dst
h, w = src.shape[:2]
cx1 = cx2 = w/2
cy1 = cy2 = h/2
# if the size is odd, then adjust the bottom/right quadrants
if w % 2 != 0:
cx2 += 1
if h % 2 != 0:
cy2 += 1
# swap quadrants
# swap q1 and q3
ret[h-cy1:, w-cx1:] = src[0:cy1 , 0:cx1 ] # q1 -> q3
ret[0:cy2 , 0:cx2 ] = src[h-cy2:, w-cx2:] # q3 -> q1
# swap q2 and q4
ret[0:cy2 , w-cx2:] = src[h-cy2:, 0:cx2 ] # q2 -> q4
ret[h-cy1:, 0:cx1 ] = src[0:cy1 , w-cx1:] # q4 -> q2
if src is dst:
dst[:,:] = ret
return dst
def img_dft(img):
h, w = img.shape[:2]
realInput = img.astype(np.float64)
# perform an optimally sized dft
dft_M = cv2.getOptimalDFTSize(w)
dft_N = cv2.getOptimalDFTSize(h)
# copy A to dft_A and pad dft_A with zeros
dft_A = np.zeros((dft_N, dft_M, 2), dtype=np.float64)
dft_A[:h, :w, 0] = realInput
# no need to pad bottom part of dft_A with zeros because of
# use of nonzeroRows parameter in cv2.dft()
cv2.dft(dft_A, dst=dft_A, nonzeroRows=h)
# Split fourier into real and imaginary parts
image_Re, image_Im = cv2.split(dft_A)
# Compute the magnitude of the spectrum Mag = sqrt(Re^2 + Im^2)
magnitude = cv2.sqrt(image_Re**2.0 + image_Im**2.0)
# Compute log(1 + Mag)
log_spectrum = cv2.log(1.0 + magnitude)
# Rearrange the quadrants of Fourier image so that the origin is at
# the image center
shift_dft(log_spectrum, log_spectrum)
return log_spectrum
def img_threshold(img):
#规范值域
cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
img = img.astype(np.uint8)
#cv2.imwrite('3.jpg', img)
#二值化
ret, thresh=cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
#cv2.imwrite('4.jpg', thresh)
return thresh
def img_angle(img):
angle_ret = []
lines = cv2.HoughLines(img, 1, np.pi/180, 80)
if lines == None:
print "Angle detect fail"
return angle_ret
#绘画直线图
#lineimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
#for rho, theta in lines[0]:
# a = np.cos(theta)
# b = np.sin(theta)
# x0 = a * rho
# y0 = b * rho
# x1 = int(x0 + 1000 * (-b))
# y1 = int(y0 + 1000 * (a))
# x2 = int(x0 - 1000 * (-b))
# y2 = int(y0 - 1000 * (a))
# cv2.line(lineimg, (x1, y1), (x2, y2), (255, 0, 0), 3)
#cv2.imwrite('6.jpg', lineimg)
#角度计算
h, w = img.shape[:2]
angle = 0
piThresh = np.pi/90
pi2 = np.pi/2
for rho, theta in lines[0]:
if abs(theta) < piThresh or abs(theta - pi2) < piThresh:
continue;
else:
angle = theta
break
angle = angle < pi2 and angle or angle - np.pi
if angle != pi2:
angleT = h * math.tan(angle) / w
angle = math.atan(angleT)
angleD = angle * 180 / np.pi
if angleD > 45.0:
angleD2 = angleD - 90
angle2 = angleD * np.pi / 180
angle_ret.append( (angle2, angleD2) )
elif angleD < -45.0:
angleD2 = angleD + 90
angle2 = angleD * np.pi / 180
angle_ret.append( (angle2, angleD2) )
elif math.fabs(angleD) < 1.5:
angleD = 0
angle = 0
angle_ret.append( (angle, angleD) )
# print angle_ret
return angle_ret
def img_rotate(filename, angle_list):
img = cv2.imread(filename, cv2.CV_LOAD_IMAGE_COLOR)
img_ret = []
for angle, angleD in angle_list:
h, w = img.shape[:2]
sinVal = math.fabs(math.sin(angle))
cosVal = math.fabs(math.cos(angle))
dx = (int)((w * cosVal + h * sinVal - w)/2)
dy = (int)((w * sinVal + h * cosVal - h)/2)
if dx < 0:
dx = 0
if dy < 0:
dy = 0
dst_img = cv2.copyMakeBorder(img, dy, dy, dx, dx, cv2.BORDER_CONSTANT, value = (255, 255, 255))
h, w = dst_img.shape[:2]
M = cv2.getRotationMatrix2D((w/2, h/2), angleD, 1.0)
dst_img = cv2.warpAffine(dst_img, M, (w, h), borderValue = (255, 255,255))
img_ret.append(dst_img)
return img_ret
def ImageFix(infile):
outfiles = []
src_img = cv2.imread(infile, cv2.CV_LOAD_IMAGE_GRAYSCALE)
if src_img == None:
logging.warning("No such file %s", os.path.basename(infile))
return outfiles
#缩放扩展
scale_img = img_scale(src_img)
#dft
dft_img = img_dft(scale_img)
#二值化
thres_img = img_threshold(dft_img)
#Hough直线检测、角度计算
angle_list = img_angle(thres_img)
#旋转校正
img_list = img_rotate(infile, angle_list)
fname, fextension = os.path.splitext(infile)
for i, img in enumerate(img_list):
filename = fname + '_' + str(i) + fextension
logging.debug("ImageFix output %s", os.path.basename(filename))
cv2.imwrite(filename, img)
outfiles.append(filename)
return outfiles
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Arguments number error"
exit(-1)
src_img = cv2.imread(sys.argv[1], cv2.CV_LOAD_IMAGE_GRAYSCALE)
if src_img == None:
print "No such file"
exit(-1)
#旋转
#rotate_about_center(src_img, 15)
#缩放扩展
scale_img = img_scale(src_img)
#dft
dft_img = img_dft(scale_img)
#二值化
thres_img = img_threshold(dft_img)
#Hough直线检测、角度计算
angle_list = img_angle(thres_img)
#旋转校正
img_list = img_rotate(sys.argv[1], angle_list)
# fw =open('/root/unpack/img_rotate.txt','w')
#输出
i = 0
fname, fextension = os.path.splitext(sys.argv[1])
for img in img_list:
filename = fname + '_' + str(i) + fextension
# print "output: ", filename
# print >>fw,filename
cv2.imwrite(filename, img)
i += 1
# fw.close()
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment