lekhnath · April 1, 2025 20:16
diff --git a/optimal-resize.py b/optimal-resize.py
 #Optimally resize `img` according to the bounding boxes specified in `boxes` (which is simply the (pruned) results from `pytesseract.image_to_data()`).
 #Tesseract performs optimally when capital letters are ~32px tall (https://groups.google.com/g/tesseract-ocr/c/Wdh_JJwnw94/m/24JHDYQbBQAJ). Smaller text obviously can't be OCR'd as accurately, but weirdly enough, larger text causes problems as well. So, this function uses the bounding boxes we've found and resizes the image so that the median line height should be ~32px.
 def optimal_resize(img, boxes):
    median_height = np.median(boxes["height"])
    target_height = 32 #See https://groups.google.com/g/tesseract-ocr/c/Wdh_JJwnw94/m/24JHDYQbBQAJ
    scale_factor = target_height / median_height
    print("Scale factor: " + str(scale_factor))
    
    #If the image is already within `skip_percentage` percent of the target size, just return the original image (it's better to skip resizing if we can)
    skip_percentage = 0.07
    if(scale_factor > 1 - skip_percentage and scale_factor < 1 + skip_percentage):
        return img
    
    #Bicubic for enlarging, "pixel area relation" for reduction. (See https://chadrick-kwag.net/cv2-resize-interpolation-methods/)
    if(scale_factor > 1.0):
        interpolation = cv2.INTER_CUBIC
    else:
        interpolation = cv2.INTER_AREA
    
    return cv2.resize(img, None, fx = scale_factor, fy = scale_factor, interpolation = interpolation)
	#Optimally resize `img` according to the bounding boxes specified in `boxes` (which is simply the (pruned) results from `pytesseract.image_to_data()`).
	#Tesseract performs optimally when capital letters are ~32px tall (https://groups.google.com/g/tesseract-ocr/c/Wdh_JJwnw94/m/24JHDYQbBQAJ). Smaller text obviously can't be OCR'd as accurately, but weirdly enough, larger text causes problems as well. So, this function uses the bounding boxes we've found and resizes the image so that the median line height should be ~32px.
	def optimal_resize(img, boxes):
	median_height = np.median(boxes["height"])
	target_height = 32 #See https://groups.google.com/g/tesseract-ocr/c/Wdh_JJwnw94/m/24JHDYQbBQAJ
	scale_factor = target_height / median_height
	print("Scale factor: " + str(scale_factor))

	#If the image is already within `skip_percentage` percent of the target size, just return the original image (it's better to skip resizing if we can)
	skip_percentage = 0.07
	if(scale_factor > 1 - skip_percentage and scale_factor < 1 + skip_percentage):
	return img

	#Bicubic for enlarging, "pixel area relation" for reduction. (See https://chadrick-kwag.net/cv2-resize-interpolation-methods/)
	if(scale_factor > 1.0):
	interpolation = cv2.INTER_CUBIC
	else:
	interpolation = cv2.INTER_AREA

	return cv2.resize(img, None, fx = scale_factor, fy = scale_factor, interpolation = interpolation)