Last active
October 4, 2024 14:20
-
-
Save smeschke/aa989df78551a9050a78e0d7a8c50495 to your computer and use it in GitHub Desktop.
Aligns a scanned document to find optimal rotation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
src = 255 - cv2.imread('/home/stephen/Desktop/I7Ykpbs.jpg',0) | |
scores = [] | |
h,w = src.shape | |
small_dimention = min(h,w) | |
src = src[:small_dimention, :small_dimention] | |
out = cv2.VideoWriter('/home/stephen/Desktop/rotate.avi', | |
cv2.VideoWriter_fourcc('M','J','P','G'), | |
15, (320,320)) | |
def rotate(img, angle): | |
rows,cols = img.shape | |
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1) | |
dst = cv2.warpAffine(img,M,(cols,rows)) | |
return dst | |
def sum_rows(img): | |
# Create a list to store the row sums | |
row_sums = [] | |
# Iterate through the rows | |
for r in range(img.shape[0]-1): | |
# Sum the row | |
row_sum = sum(sum(img[r:r+1,:])) | |
# Add the sum to the list | |
row_sums.append(row_sum) | |
# Normalize range to (0,255) | |
row_sums = (row_sums/max(row_sums)) * 255 | |
# Return | |
return row_sums | |
def display_data(roi, row_sums, buffer): | |
# Create background to draw transform on | |
bg = np.zeros((buffer*2, buffer*2), np.uint8) | |
# Iterate through the rows and draw on the background | |
for row in range(roi.shape[0]-1): | |
row_sum = row_sums[row] | |
bg[row:row+1, :] = row_sum | |
left_side = int(buffer/3) | |
bg[:, left_side:] = roi[:,left_side:] | |
cv2.imshow('bg1', bg) | |
k = cv2.waitKey(1) | |
out.write(cv2.cvtColor(cv2.resize(bg, (320,320)), cv2.COLOR_GRAY2BGR)) | |
return k | |
# Rotate the image around in a circle | |
angle = 0 | |
while angle <= 360: | |
# Rotate the source image | |
img = rotate(src, angle) | |
# Crop the center 1/3rd of the image (roi is filled with text) | |
h,w = img.shape | |
buffer = min(h, w) - int(min(h,w)/1.5) | |
#roi = img.copy() | |
roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)] | |
# Create background to draw transform on | |
bg = np.zeros((buffer*2, buffer*2), np.uint8) | |
# Threshold image | |
_, roi = cv2.threshold(roi, 140, 255, cv2.THRESH_BINARY) | |
# Compute the sums of the rows | |
row_sums = sum_rows(roi) | |
# High score --> Zebra stripes | |
score = np.count_nonzero(row_sums) | |
if sum(row_sums) < 100000: scores.append(angle) | |
k = display_data(roi, row_sums, buffer) | |
if k == 27: break | |
# Increment angle and try again | |
angle += .5 | |
cv2.destroyAllWindows() | |
# Create images for display purposes | |
display = src.copy() | |
# Create an image that contains bins. | |
bins_image = np.zeros_like(display) | |
for angle in scores: | |
# Rotate the image and draw a line on it | |
display = rotate(display, angle) | |
cv2.line(display, (0,int(h/2)), (w,int(h/2)), 255, 1) | |
display = rotate(display, -angle) | |
# Rotate the bins image | |
bins_image = rotate(bins_image, angle) | |
# Draw a line on a temporary image | |
temp = np.zeros_like(bins_image) | |
cv2.line(temp, (0,int(h/2)), (w,int(h/2)), 50, 1) | |
# 'Fill' up the bins | |
bins_image += temp | |
bins_image = rotate(bins_image, -angle) | |
# Find the most filled bin | |
for col in range(bins_image.shape[0]-1): | |
column = bins_image[:, col:col+1] | |
if np.amax(column) == np.amax(bins_image): x = col | |
for col in range(bins_image.shape[0]-1): | |
column = bins_image[:, col:col+1] | |
if np.amax(column) == np.amax(bins_image): y = col | |
# Draw circles showing the most filled bin | |
cv2.circle(display, (x,y), 560, 255, 5) | |
# Plot with Matplotlib | |
import matplotlib.pyplot as plt | |
import matplotlib.image as mpimg | |
f, axarr = plt.subplots(1,3, sharex=True) | |
axarr[0].imshow(src) | |
axarr[1].imshow(display) | |
axarr[2].imshow(bins_image) | |
axarr[0].set_title('Source Image') | |
axarr[1].set_title('Output') | |
axarr[2].set_title('Bins Image') | |
axarr[0].axis('off') | |
axarr[1].axis('off') | |
axarr[2].axis('off') | |
plt.show() | |
cv2.waitKey() | |
cv2.destroyAllWindows() |
For those asking, here's a snip of code that returns a positive number if there is more area in the margins above, and a negative number if there is more area in the margins below. (Disclaimer: This is untested code):
def top_bot_margin_ratio(image: np.ndarray) -> float:
if len(image.shape) > 2 and image.shape[2] > 1:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
above = 0
below = 0
for x in range(image.shape[1]):
col = np.argwhere(image[:, x] < 128)
if col.shape[0] > 0:
above += col[0, 0]
below += image.shape[0] - 1 - col[-1, 0]
return math.log(above / below)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey by any chance did you come up with the solution yet?