saosir · August 13, 2025 05:54
diff --git a/image_fix.py b/image_fix.py
 #!/usr/bin/python
 # -*- coding:utf8 -*-

 import sys
 import os
 import cv2
 import math
 import logging
 import numpy as np
 """
 将倾斜的图片纠正，以便tesseract能够正确识别
 """
 def rotate_about_center(src, angle, scale=1.):
    h, w = src_img.shape[:2]
    rangle = np.deg2rad(angle)  # angle in radians
    # now calculate new image width and height
    nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
    nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
    # ask OpenCV for the rotation matrix
    rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
    # calculate the move from the old center to the new center combined
    # with the rotation
    rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
    # the move only affects the translation, so update the translation
    # part of the transform
    rot_mat[0, 2] += rot_move[0]
    rot_mat[1, 2] += rot_move[1]
    dst = cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
    cv2.imwrite('test_dst.jpg', dst)
    
 def img_scale(img):
    #缩放，加快处理速度
    h, w = img.shape[:2]
    if h > 1024 or w> 1024:
        if h > 1024:
            rate = 1024 / float(h)
        else:
            rate = 1024 / float(w)
        print "resize rate: ", rate
        img = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
        #cv2.imwrite('0.jpg', img)
    #扩展白边，有助于较小图片的识别
    h, w = img.shape[:2]
    if h < 1000:
        border = (1000 - h)/2
        img = cv2.copyMakeBorder(img, border, border, 0, 0, cv2.BORDER_CONSTANT, value = (255, 255, 255))
    if w < 1000:
        border = (1000 - w)/2
        img = cv2.copyMakeBorder(img, 0, 0, border, border, cv2.BORDER_CONSTANT, value = (255, 255, 255))
    return img
    
 def shift_dft(src, dst=None):
    '''
        Rearrange the quadrants of Fourier image so that the origin is at
        the image center. Swaps quadrant 1 with 3, and 2 with 4.

        src and dst arrays must be equal size & type
    '''
    if dst is None:
        dst = np.empty(src.shape, src.dtype)
    elif src.shape != dst.shape:
        raise ValueError("src and dst must have equal sizes")
    elif src.dtype != dst.dtype:
        raise TypeError("src and dst must have equal types")
    if src is dst:
        ret = np.empty(src.shape, src.dtype)
    else:
        ret = dst
    h, w = src.shape[:2]
    cx1 = cx2 = w/2
    cy1 = cy2 = h/2
    # if the size is odd, then adjust the bottom/right quadrants
    if w % 2 != 0:
        cx2 += 1
    if h % 2 != 0:
        cy2 += 1
    # swap quadrants
    # swap q1 and q3
    ret[h-cy1:, w-cx1:] = src[0:cy1 , 0:cx1 ]   # q1 -> q3
    ret[0:cy2 , 0:cx2 ] = src[h-cy2:, w-cx2:]   # q3 -> q1
    # swap q2 and q4
    ret[0:cy2 , w-cx2:] = src[h-cy2:, 0:cx2 ]   # q2 -> q4
    ret[h-cy1:, 0:cx1 ] = src[0:cy1 , w-cx1:]   # q4 -> q2
    if src is dst:
        dst[:,:] = ret
    return dst

 def img_dft(img):
    h, w = img.shape[:2]
    realInput = img.astype(np.float64)
    # perform an optimally sized dft
    dft_M = cv2.getOptimalDFTSize(w)
    dft_N = cv2.getOptimalDFTSize(h)
    # copy A to dft_A and pad dft_A with zeros
    dft_A = np.zeros((dft_N, dft_M, 2), dtype=np.float64)
    dft_A[:h, :w, 0] = realInput
    # no need to pad bottom part of dft_A with zeros because of
    # use of nonzeroRows parameter in cv2.dft()
    cv2.dft(dft_A, dst=dft_A, nonzeroRows=h)
    # Split fourier into real and imaginary parts
    image_Re, image_Im = cv2.split(dft_A)
    # Compute the magnitude of the spectrum Mag = sqrt(Re^2 + Im^2)
    magnitude = cv2.sqrt(image_Re**2.0 + image_Im**2.0)
    # Compute log(1 + Mag)
    log_spectrum = cv2.log(1.0 + magnitude)
    # Rearrange the quadrants of Fourier image so that the origin is at
    # the image center
    shift_dft(log_spectrum, log_spectrum)
    return log_spectrum
    
 def img_threshold(img):
    #规范值域
    cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
    img = img.astype(np.uint8)
    #cv2.imwrite('3.jpg', img)	
    #二值化
    ret, thresh=cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
    #cv2.imwrite('4.jpg', thresh)
    return thresh

 def img_angle(img):
    angle_ret = []
    lines = cv2.HoughLines(img, 1, np.pi/180, 80)
    if lines == None:
        print "Angle detect fail"
        return angle_ret
    
    #绘画直线图
    #lineimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    #for rho, theta in lines[0]:
    #	a = np.cos(theta)
    #	b = np.sin(theta)
    #	x0 = a * rho
    #	y0 = b * rho
    #	x1 = int(x0 + 1000 * (-b))
    #	y1 = int(y0 + 1000 * (a))
    #	x2 = int(x0 - 1000 * (-b))
    #	y2 = int(y0 - 1000 * (a))
    #	cv2.line(lineimg, (x1, y1), (x2, y2), (255, 0, 0), 3)
    #cv2.imwrite('6.jpg', lineimg)
    
    #角度计算
    h, w = img.shape[:2]
    angle = 0
    piThresh = np.pi/90
    pi2 = np.pi/2
    for rho, theta in lines[0]:
        if abs(theta) < piThresh or abs(theta - pi2) < piThresh:
            continue;
        else:
            angle = theta
            break
    angle = angle < pi2 and angle or angle - np.pi
    if angle != pi2:
        angleT = h * math.tan(angle) / w
        angle = math.atan(angleT)
    angleD = angle * 180 / np.pi
    if angleD > 45.0:
        angleD2 = angleD - 90
        angle2 = angleD * np.pi / 180
        angle_ret.append( (angle2, angleD2) )
    elif angleD < -45.0:
        angleD2 = angleD + 90
        angle2 = angleD * np.pi / 180
        angle_ret.append( (angle2, angleD2) )
    elif math.fabs(angleD) < 1.5:
        angleD = 0
        angle = 0
    angle_ret.append( (angle, angleD) )
    # print angle_ret
    return angle_ret
    
 def img_rotate(filename, angle_list):
    img = cv2.imread(filename, cv2.CV_LOAD_IMAGE_COLOR)
    img_ret = []
    for angle, angleD in angle_list:
        h, w = img.shape[:2]
        sinVal = math.fabs(math.sin(angle))
        cosVal = math.fabs(math.cos(angle))
        dx = (int)((w * cosVal + h * sinVal - w)/2)
        dy = (int)((w * sinVal + h * cosVal - h)/2)
        if dx < 0:
            dx = 0
        if dy < 0:
            dy = 0
        dst_img = cv2.copyMakeBorder(img, dy, dy, dx, dx, cv2.BORDER_CONSTANT, value = (255, 255, 255))
        h, w = dst_img.shape[:2]
        M = cv2.getRotationMatrix2D((w/2, h/2), angleD, 1.0)
        dst_img = cv2.warpAffine(dst_img, M, (w, h), borderValue = (255, 255,255))
        img_ret.append(dst_img)
    return img_ret

 def ImageFix(infile):
    outfiles = []
    src_img = cv2.imread(infile, cv2.CV_LOAD_IMAGE_GRAYSCALE)
    if src_img == None:
        logging.warning("No such file %s", os.path.basename(infile))
        return outfiles
    #缩放扩展
    scale_img = img_scale(src_img)
    
    #dft
    dft_img = img_dft(scale_img)

    #二值化
    thres_img = img_threshold(dft_img)
    
    #Hough直线检测、角度计算
    angle_list = img_angle(thres_img)

    #旋转校正
    img_list = img_rotate(infile, angle_list)
    fname, fextension = os.path.splitext(infile)
    for i, img in enumerate(img_list):
        filename = fname + '_' + str(i) + fextension
        logging.debug("ImageFix output %s", os.path.basename(filename))
        cv2.imwrite(filename, img)
        outfiles.append(filename)
    return outfiles
    
 if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Arguments number error"
        exit(-1)
    src_img = cv2.imread(sys.argv[1], cv2.CV_LOAD_IMAGE_GRAYSCALE)
    if src_img == None:
        print "No such file"
        exit(-1)
    
    #旋转
    #rotate_about_center(src_img, 15)
    
    #缩放扩展
    scale_img = img_scale(src_img)
    
    #dft
    dft_img = img_dft(scale_img)

    #二值化
    thres_img = img_threshold(dft_img)
    
    #Hough直线检测、角度计算
    angle_list = img_angle(thres_img)

    #旋转校正
    img_list = img_rotate(sys.argv[1], angle_list)
        # fw =open('/root/unpack/img_rotate.txt','w')
    #输出
    i = 0
    fname, fextension = os.path.splitext(sys.argv[1])
    for img in img_list:
        filename = fname + '_' + str(i) + fextension
        # print "output: ", filename
        # print >>fw,filename
        cv2.imwrite(filename, img)
        i += 1
    # fw.close()
    exit(0)
	#!/usr/bin/python
	# -- coding:utf8 --

	import sys
	import os
	import cv2
	import math
	import logging
	import numpy as np
	"""
	将倾斜的图片纠正，以便tesseract能够正确识别
	"""
	def rotate_about_center(src, angle, scale=1.):
	h, w = src_img.shape[:2]
	rangle = np.deg2rad(angle) # angle in radians
	# now calculate new image width and height
	nw = (abs(np.sin(rangle)h) + abs(np.cos(rangle)w))*scale
	nh = (abs(np.cos(rangle)h) + abs(np.sin(rangle)w))*scale
	# ask OpenCV for the rotation matrix
	rot_mat = cv2.getRotationMatrix2D((nw0.5, nh0.5), angle, scale)
	# calculate the move from the old center to the new center combined
	# with the rotation
	rot_move = np.dot(rot_mat, np.array([(nw-w)0.5, (nh-h)0.5, 0]))
	# the move only affects the translation, so update the translation
	# part of the transform
	rot_mat[0, 2] += rot_move[0]
	rot_mat[1, 2] += rot_move[1]
	dst = cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
	cv2.imwrite('test_dst.jpg', dst)

	def img_scale(img):
	#缩放，加快处理速度
	h, w = img.shape[:2]
	if h > 1024 or w> 1024:
	if h > 1024:
	rate = 1024 / float(h)
	else:
	rate = 1024 / float(w)
	print "resize rate: ", rate
	img = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
	#cv2.imwrite('0.jpg', img)
	#扩展白边，有助于较小图片的识别
	h, w = img.shape[:2]
	if h < 1000:
	border = (1000 - h)/2
	img = cv2.copyMakeBorder(img, border, border, 0, 0, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	if w < 1000:
	border = (1000 - w)/2
	img = cv2.copyMakeBorder(img, 0, 0, border, border, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	return img

	def shift_dft(src, dst=None):
	'''
	Rearrange the quadrants of Fourier image so that the origin is at
	the image center. Swaps quadrant 1 with 3, and 2 with 4.

	src and dst arrays must be equal size & type
	'''
	if dst is None:
	dst = np.empty(src.shape, src.dtype)
	elif src.shape != dst.shape:
	raise ValueError("src and dst must have equal sizes")
	elif src.dtype != dst.dtype:
	raise TypeError("src and dst must have equal types")
	if src is dst:
	ret = np.empty(src.shape, src.dtype)
	else:
	ret = dst
	h, w = src.shape[:2]
	cx1 = cx2 = w/2
	cy1 = cy2 = h/2
	# if the size is odd, then adjust the bottom/right quadrants
	if w % 2 != 0:
	cx2 += 1
	if h % 2 != 0:
	cy2 += 1
	# swap quadrants
	# swap q1 and q3
	ret[h-cy1:, w-cx1:] = src[0:cy1 , 0:cx1 ] # q1 -> q3
	ret[0:cy2 , 0:cx2 ] = src[h-cy2:, w-cx2:] # q3 -> q1
	# swap q2 and q4
	ret[0:cy2 , w-cx2:] = src[h-cy2:, 0:cx2 ] # q2 -> q4
	ret[h-cy1:, 0:cx1 ] = src[0:cy1 , w-cx1:] # q4 -> q2
	if src is dst:
	dst[:,:] = ret
	return dst

	def img_dft(img):
	h, w = img.shape[:2]
	realInput = img.astype(np.float64)
	# perform an optimally sized dft
	dft_M = cv2.getOptimalDFTSize(w)
	dft_N = cv2.getOptimalDFTSize(h)
	# copy A to dft_A and pad dft_A with zeros
	dft_A = np.zeros((dft_N, dft_M, 2), dtype=np.float64)
	dft_A[:h, :w, 0] = realInput
	# no need to pad bottom part of dft_A with zeros because of
	# use of nonzeroRows parameter in cv2.dft()
	cv2.dft(dft_A, dst=dft_A, nonzeroRows=h)
	# Split fourier into real and imaginary parts
	image_Re, image_Im = cv2.split(dft_A)
	# Compute the magnitude of the spectrum Mag = sqrt(Re^2 + Im^2)
	magnitude = cv2.sqrt(image_Re2.0 + image_Im2.0)
	# Compute log(1 + Mag)
	log_spectrum = cv2.log(1.0 + magnitude)
	# Rearrange the quadrants of Fourier image so that the origin is at
	# the image center
	shift_dft(log_spectrum, log_spectrum)
	return log_spectrum

	def img_threshold(img):
	#规范值域
	cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
	img = img.astype(np.uint8)
	#cv2.imwrite('3.jpg', img)
	#二值化
	ret, thresh=cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
	#cv2.imwrite('4.jpg', thresh)
	return thresh

	def img_angle(img):
	angle_ret = []
	lines = cv2.HoughLines(img, 1, np.pi/180, 80)
	if lines == None:
	print "Angle detect fail"
	return angle_ret

	#绘画直线图
	#lineimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
	#for rho, theta in lines[0]:
	# a = np.cos(theta)
	# b = np.sin(theta)
	# x0 = a * rho
	# y0 = b * rho
	# x1 = int(x0 + 1000 * (-b))
	# y1 = int(y0 + 1000 * (a))
	# x2 = int(x0 - 1000 * (-b))
	# y2 = int(y0 - 1000 * (a))
	# cv2.line(lineimg, (x1, y1), (x2, y2), (255, 0, 0), 3)
	#cv2.imwrite('6.jpg', lineimg)

	#角度计算
	h, w = img.shape[:2]
	angle = 0
	piThresh = np.pi/90
	pi2 = np.pi/2
	for rho, theta in lines[0]:
	if abs(theta) < piThresh or abs(theta - pi2) < piThresh:
	continue;
	else:
	angle = theta
	break
	angle = angle < pi2 and angle or angle - np.pi
	if angle != pi2:
	angleT = h * math.tan(angle) / w
	angle = math.atan(angleT)
	angleD = angle * 180 / np.pi
	if angleD > 45.0:
	angleD2 = angleD - 90
	angle2 = angleD * np.pi / 180
	angle_ret.append( (angle2, angleD2) )
	elif angleD < -45.0:
	angleD2 = angleD + 90
	angle2 = angleD * np.pi / 180
	angle_ret.append( (angle2, angleD2) )
	elif math.fabs(angleD) < 1.5:
	angleD = 0
	angle = 0
	angle_ret.append( (angle, angleD) )
	# print angle_ret
	return angle_ret

	def img_rotate(filename, angle_list):
	img = cv2.imread(filename, cv2.CV_LOAD_IMAGE_COLOR)
	img_ret = []
	for angle, angleD in angle_list:
	h, w = img.shape[:2]
	sinVal = math.fabs(math.sin(angle))
	cosVal = math.fabs(math.cos(angle))
	dx = (int)((w * cosVal + h * sinVal - w)/2)
	dy = (int)((w * sinVal + h * cosVal - h)/2)
	if dx < 0:
	dx = 0
	if dy < 0:
	dy = 0
	dst_img = cv2.copyMakeBorder(img, dy, dy, dx, dx, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	h, w = dst_img.shape[:2]
	M = cv2.getRotationMatrix2D((w/2, h/2), angleD, 1.0)
	dst_img = cv2.warpAffine(dst_img, M, (w, h), borderValue = (255, 255,255))
	img_ret.append(dst_img)
	return img_ret

	def ImageFix(infile):
	outfiles = []
	src_img = cv2.imread(infile, cv2.CV_LOAD_IMAGE_GRAYSCALE)
	if src_img == None:
	logging.warning("No such file %s", os.path.basename(infile))
	return outfiles
	#缩放扩展
	scale_img = img_scale(src_img)

	#dft
	dft_img = img_dft(scale_img)

	#二值化
	thres_img = img_threshold(dft_img)

	#Hough直线检测、角度计算
	angle_list = img_angle(thres_img)

	#旋转校正
	img_list = img_rotate(infile, angle_list)
	fname, fextension = os.path.splitext(infile)
	for i, img in enumerate(img_list):
	filename = fname + '_' + str(i) + fextension
	logging.debug("ImageFix output %s", os.path.basename(filename))
	cv2.imwrite(filename, img)
	outfiles.append(filename)
	return outfiles

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print "Arguments number error"
	exit(-1)
	src_img = cv2.imread(sys.argv[1], cv2.CV_LOAD_IMAGE_GRAYSCALE)
	if src_img == None:
	print "No such file"
	exit(-1)

	#旋转
	#rotate_about_center(src_img, 15)

	#缩放扩展
	scale_img = img_scale(src_img)

	#dft
	dft_img = img_dft(scale_img)

	#二值化
	thres_img = img_threshold(dft_img)

	#Hough直线检测、角度计算
	angle_list = img_angle(thres_img)

	#旋转校正
	img_list = img_rotate(sys.argv[1], angle_list)
	# fw =open('/root/unpack/img_rotate.txt','w')
	#输出
	i = 0
	fname, fextension = os.path.splitext(sys.argv[1])
	for img in img_list:
	filename = fname + '_' + str(i) + fextension
	# print "output: ", filename
	# print >>fw,filename
	cv2.imwrite(filename, img)
	i += 1
	# fw.close()
	exit(0)
No results found