Created
March 7, 2022 22:20
-
-
Save bemasher/4ebcc965fa5e715c9c1c3fd480d5a184 to your computer and use it in GitHub Desktop.
Given an image of a scanned greyscale document with overscan borders, crop and threshold to just the document.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import cv2 as cv | |
import numpy as np | |
from skimage.measure import LineModelND, ransac | |
srcPath = pathlib.Path('input') | |
dstPath = pathlib.Path('output') | |
k = cv.getStructuringElement(cv.MORPH_CROSS, (3, 3)) | |
for src in srcPath.glob('*.jpg'): | |
dst = dstPath / src.with_suffix(".png").name | |
print(src, '->', dst) | |
# if dst.exists(): | |
# continue | |
img = cv.imread(str(src)) | |
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) | |
dy, dx = gray.shape | |
# Throw out some details. | |
blur = cv.medianBlur(gray, 15) | |
# Find edges. | |
edges = cv.Canny(blur, 0, 100, apertureSize=3) | |
# Close any gaps. | |
edges = cv.morphologyEx(edges, cv.MORPH_DILATE, k) | |
# Find contours. | |
cnts, heir = cv.findContours( | |
edges, | |
cv.RETR_LIST, | |
# cv.CHAIN_APPROX_SIMPLE | |
cv.CHAIN_APPROX_NONE | |
) | |
# Assume the largest contour is our page. | |
page = max(cnts, key=cv.contourArea).reshape(-1, 2) | |
d, _ = page.shape | |
page = page[::d // (4*100)] # Keep 100 points per side. | |
# Select outermost quarters in the x and y dimensions. | |
# x . . x | |
# x . . x | |
# x . . x | |
# x . . x | |
l = page[:, 0] < dx//4 | |
r = page[:, 0] >= dx//4*3 | |
# y y y y | |
# . . . . | |
# . . . . | |
# y y y y | |
b = page[:, 1] < dy//4 | |
t = page[:, 1] >= dy//4*3 | |
lines = [] | |
# In each slice of points. | |
for sel in [l, r, t, b]: | |
# Use ransac to select points on a line. | |
robust, inliers = ransac( | |
page[sel], | |
LineModelND, | |
min_samples=2, | |
residual_threshold=1, | |
max_trials=100, | |
) | |
# Add the line's parameters to our list. | |
lines.append(robust.params) | |
# Make a list of points. | |
c = np.array([]) | |
# For each line. | |
for i1, l1 in enumerate(lines): | |
p1 = l1[0] # origin | |
v1 = l1[1] # direction | |
# For each other line. | |
for l2 in lines[i1+1:]: | |
p2 = l2[0] # origin | |
v2 = l2[1] # direction | |
# Find the intersection between l1 and l2. | |
v = np.subtract(p2, p1) | |
vp = np.cross(v1, v2) | |
num = np.dot(np.cross(v, v2), vp) | |
denom = np.linalg.norm(vp) ** 2 | |
va = num / denom * v1 | |
x, y = p1 + va | |
# If the intersection point is outside the image, ignore it. | |
if x < 0 or x > dx or y < 0 or y > dy: | |
continue | |
# Add it to the corner list. | |
c = np.append(c, [x, y]) | |
# Make c a list of x,y coordinates. | |
c = c.reshape(-1, 2) | |
# Get coordinates in a specific order. | |
tl = c[(c[:, 0] < dx//2) & (c[:, 1] < dy//2)] # Top-Left | |
tr = c[(c[:, 0] >= dx//2) & (c[:, 1] < dy//2)] # Top-Right | |
bl = c[(c[:, 0] < dx//2) & (c[:, 1] >= dy//2)] # Bottom-Left | |
br = c[(c[:, 0] >= dx//2) & (c[:, 1] >= dy//2)] # Bottom-Right | |
# Calculate dimensions for the document. | |
x = max( | |
np.hypot(*(tr-tl).ravel()), | |
np.hypot(*(br-bl).ravel()) | |
) | |
y = max( | |
np.hypot(*(tl-bl).ravel()), | |
np.hypot(*(tr-br).ravel()) | |
) | |
# Downscale to 400dpi assuming Letter-sized page. | |
scale = 4400 / y | |
x = int(x * scale) | |
y = 4400 | |
# Calculate a perspective transform. | |
m = cv.getPerspectiveTransform( | |
np.array([tl, tr, br, bl], dtype=np.float32), | |
np.array([[0, 0], [x, 0], [x, y], [0, y]], dtype=np.float32) | |
) | |
# Apply the transform. | |
warp = cv.warpPerspective(gray, m, (x, y), cv.INTER_LANCZOS4) | |
# Apply adaptive thresholding. | |
thresh = cv.adaptiveThreshold( | |
warp, 255, | |
cv.ADAPTIVE_THRESH_MEAN_C, | |
cv.THRESH_BINARY, | |
255, 30 | |
) | |
thresh, _ = cv.filterSpeckles(thresh, 255, 8, 64) | |
# Erase edges. | |
edgeMargin = 16 | |
thresh[:edgeMargin, :] = 255 | |
thresh[-edgeMargin:, :] = 255 | |
thresh[:, :edgeMargin] = 255 | |
thresh[:, -edgeMargin:] = 255 | |
# Write image to disk. | |
cv.imwrite(str(dst), thresh) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment