Last active
October 5, 2023 16:00
-
-
Save mooz/333a5d97a06e685417d2c205bf29487a to your computer and use it in GitHub Desktop.
Extract a slide image from a photo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# ============================================================ # | |
# 写真からスライド領域を切り出して保存。透視変換もおこなう | |
# | |
# Usage: python extract_slide.py YOUR_PHOTO | |
# | |
# ============================================================ # | |
import cv2 | |
import numpy as np | |
# ============================================================ # | |
# 画像の準備 | |
# ============================================================ # | |
import sys | |
img_path = sys.argv[1] | |
# 画像のロード | |
img = cv2.imread(img_path) | |
# BGR から RGB に変換 | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
# ============================================================ # | |
# 画像の前処理 | |
# ============================================================ # | |
# グレースケール化 + ガウスぼかし | |
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
img_preprocessed = cv2.GaussianBlur(img_gray, (5, 5), 0) | |
# 大津の方式で二値化 | |
_, img_binary = cv2.threshold(img_preprocessed, 0, 255, | |
cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# ============================================================ # | |
# 輪郭の抽出 | |
# http://opencv.jp/opencv-2.1/cpp/structural_analysis_and_shape_descriptors.html | |
# http://docs.opencv.org/3.1.0/dd/d49/tutorial_py_contour_features.html | |
# ============================================================ # | |
contours = cv2.findContours(np.copy(img_binary), | |
cv2.RETR_EXTERNAL, | |
cv2.CHAIN_APPROX_SIMPLE)[1] | |
# ============================================================ # | |
# 輪郭のなかからスライドっぽいものだけを抜き出す | |
# ============================================================ # | |
def simplify_contour(contour): | |
""" | |
Simplify the given contour using Douglas-Peucker algorithm | |
""" | |
hull = cv2.convexHull(contour) | |
epsilon = 0.1 * cv2.arcLength(hull, True) | |
return cv2.approxPolyDP(hull, epsilon, True) | |
contours_simplified = [simplify_contour(contour) for contour in contours] | |
# 四点からなるモノだけ抽出 | |
contours_rectangles = filter(lambda contour: len(contour) == 4, contours_simplified) | |
if contours_rectangles: | |
# 最もスライドらしい領域をひとつだけ選ぶ | |
def slide_likelihood(contour): | |
""" | |
Compute the score how the given contour looks like a slide | |
""" | |
# 一番大きな輪郭がスライドという仮定の下で、一番面積の大きな輪郭を算出 | |
# (グリーンの定理で面積を求めているらしい) | |
return cv2.contourArea(contour) | |
largest_contour_index = np.argmax([slide_likelihood(contour) | |
for contour in contours_rectangles]) | |
# ============================================================ # | |
# 画像を切り取る | |
# ============================================================ # | |
# 最もスライドらしい領域の bounding box を算出 | |
largest_contour = contours_rectangles[largest_contour_index] | |
x, y, w, h = cv2.boundingRect(largest_contour) | |
def normalize_contour(contour): | |
""" | |
まず x の値で並び替え、その次に y の値で並び替える | |
""" | |
left_part = contour[np.argsort(contour[:, 0])][0:2] | |
right_part = contour[np.argsort(contour[:, 0])][2:4] | |
left_top, left_bottom = left_part[np.argsort(left_part[:, 1])] | |
right_top, right_bottom = right_part[np.argsort(right_part[:, 1])] | |
return np.array([left_top, left_bottom, right_top, right_bottom]) | |
# 透視変換用に座標情報を用意 | |
slide_rect = np.array([[0, 0], | |
[0, h], | |
[w, 0], | |
[w, h]]) | |
from_rect = normalize_contour(largest_contour.reshape(4, 2)).astype(np.float32) | |
to_rect = normalize_contour(slide_rect).astype(np.float32) | |
# 透視変換をかける | |
M = cv2.getPerspectiveTransform(from_rect, to_rect) | |
warped_img = cv2.warpPerspective(img, M, (w, h)) | |
# 切り取ってみる | |
cropped_img = img[y:y+h, x:x+w] | |
# 切り取った/透視変換した画像を保存する | |
cv2.imwrite(img_path + ".cropped.jpg", | |
cv2.cvtColor(cropped_img, cv2.COLOR_RGB2BGR)) | |
cv2.imwrite(img_path + ".warped.jpg", | |
cv2.cvtColor(warped_img, cv2.COLOR_RGB2BGR)) | |
print("Finished processing" + img_path + ". Saved the cropped/warped image") | |
else: | |
print("Slide not found in " + img_path) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment