Skip to content

Instantly share code, notes, and snippets.

@smeschke
Last active October 4, 2024 14:20
Show Gist options
  • Save smeschke/aa989df78551a9050a78e0d7a8c50495 to your computer and use it in GitHub Desktop.
Save smeschke/aa989df78551a9050a78e0d7a8c50495 to your computer and use it in GitHub Desktop.
Aligns a scanned document to find optimal rotation
import cv2
import numpy as np
src = 255 - cv2.imread('/home/stephen/Desktop/I7Ykpbs.jpg',0)
scores = []
h,w = src.shape
small_dimention = min(h,w)
src = src[:small_dimention, :small_dimention]
out = cv2.VideoWriter('/home/stephen/Desktop/rotate.avi',
cv2.VideoWriter_fourcc('M','J','P','G'),
15, (320,320))
def rotate(img, angle):
rows,cols = img.shape
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
dst = cv2.warpAffine(img,M,(cols,rows))
return dst
def sum_rows(img):
# Create a list to store the row sums
row_sums = []
# Iterate through the rows
for r in range(img.shape[0]-1):
# Sum the row
row_sum = sum(sum(img[r:r+1,:]))
# Add the sum to the list
row_sums.append(row_sum)
# Normalize range to (0,255)
row_sums = (row_sums/max(row_sums)) * 255
# Return
return row_sums
def display_data(roi, row_sums, buffer):
# Create background to draw transform on
bg = np.zeros((buffer*2, buffer*2), np.uint8)
# Iterate through the rows and draw on the background
for row in range(roi.shape[0]-1):
row_sum = row_sums[row]
bg[row:row+1, :] = row_sum
left_side = int(buffer/3)
bg[:, left_side:] = roi[:,left_side:]
cv2.imshow('bg1', bg)
k = cv2.waitKey(1)
out.write(cv2.cvtColor(cv2.resize(bg, (320,320)), cv2.COLOR_GRAY2BGR))
return k
# Rotate the image around in a circle
angle = 0
while angle <= 360:
# Rotate the source image
img = rotate(src, angle)
# Crop the center 1/3rd of the image (roi is filled with text)
h,w = img.shape
buffer = min(h, w) - int(min(h,w)/1.5)
#roi = img.copy()
roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]
# Create background to draw transform on
bg = np.zeros((buffer*2, buffer*2), np.uint8)
# Threshold image
_, roi = cv2.threshold(roi, 140, 255, cv2.THRESH_BINARY)
# Compute the sums of the rows
row_sums = sum_rows(roi)
# High score --> Zebra stripes
score = np.count_nonzero(row_sums)
if sum(row_sums) < 100000: scores.append(angle)
k = display_data(roi, row_sums, buffer)
if k == 27: break
# Increment angle and try again
angle += .5
cv2.destroyAllWindows()
# Create images for display purposes
display = src.copy()
# Create an image that contains bins.
bins_image = np.zeros_like(display)
for angle in scores:
# Rotate the image and draw a line on it
display = rotate(display, angle)
cv2.line(display, (0,int(h/2)), (w,int(h/2)), 255, 1)
display = rotate(display, -angle)
# Rotate the bins image
bins_image = rotate(bins_image, angle)
# Draw a line on a temporary image
temp = np.zeros_like(bins_image)
cv2.line(temp, (0,int(h/2)), (w,int(h/2)), 50, 1)
# 'Fill' up the bins
bins_image += temp
bins_image = rotate(bins_image, -angle)
# Find the most filled bin
for col in range(bins_image.shape[0]-1):
column = bins_image[:, col:col+1]
if np.amax(column) == np.amax(bins_image): x = col
for col in range(bins_image.shape[0]-1):
column = bins_image[:, col:col+1]
if np.amax(column) == np.amax(bins_image): y = col
# Draw circles showing the most filled bin
cv2.circle(display, (x,y), 560, 255, 5)
# Plot with Matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
f, axarr = plt.subplots(1,3, sharex=True)
axarr[0].imshow(src)
axarr[1].imshow(display)
axarr[2].imshow(bins_image)
axarr[0].set_title('Source Image')
axarr[1].set_title('Output')
axarr[2].set_title('Bins Image')
axarr[0].axis('off')
axarr[1].axis('off')
axarr[2].axis('off')
plt.show()
cv2.waitKey()
cv2.destroyAllWindows()
@NUAAwanghe
Copy link

hi,i come from https://stackoverflow.com/questions/55654142/detect-if-a-text-image-is-upside-down,Can you provide some details about this function “area_to_top_of_text“,Thank you in advance。

@yamini1473
Copy link

Hey by any chance did you come up with the solution yet?

@mikegashler
Copy link

mikegashler commented Jul 27, 2020

For those asking, here's a snip of code that returns a positive number if there is more area in the margins above, and a negative number if there is more area in the margins below. (Disclaimer: This is untested code):

def top_bot_margin_ratio(image: np.ndarray) -> float:
    if len(image.shape) > 2 and image.shape[2] > 1:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    above = 0
    below = 0
    for x in range(image.shape[1]):
        col = np.argwhere(image[:, x] < 128)
        if col.shape[0] > 0:
            above += col[0, 0]
            below += image.shape[0] - 1 - col[-1, 0]
    return math.log(above / below)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment