Created
July 2, 2024 07:30
-
-
Save Marken-Foo/9478b1b48120b80da752d7af897ab7f6 to your computer and use it in GitHub Desktop.
Python script to convert shogi board image to SFEN (needs tsumemi)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import cv2 | |
import numpy as np | |
import pytesseract | |
from pytesseract import Output | |
from basetypes import Koma, Side, KTYPE_FROM_KANJI | |
from position import Position | |
from square import Square | |
import sys | |
sys.stdout.reconfigure(encoding="utf-8") | |
KOMA_CHARS = set(("成", "歩", "香", "桂", "銀", "金", "角", "飛", "玉", "と", "龍", "竜", "馬", "全", "圭", "杏")) | |
def main(): | |
img = cv2.imread("input.png", cv2.IMREAD_GRAYSCALE) | |
if img is None: | |
print("image failed to open") | |
return | |
xs, ys = find_board_lines(img) | |
img = apply_threshold(img) # apparently this is an idempotent operation? | |
if len(xs) != 10 or len(ys) != 10: | |
print("Did not identify a 9x9 grid") | |
return | |
# print("y values for horizontal lines:") | |
# print(ys) | |
# print("x values for vertical lines") | |
# print(xs) | |
# build a sfen one square at a time | |
pos = Position() | |
for row_idx in range(9): | |
for col_idx in range(9): | |
# Crop to fit a single piece | |
x = xs[col_idx][1] | |
y = ys[row_idx][1] | |
w = xs[col_idx+1][0] - x | |
h = ys[row_idx+1][0] - y | |
sq_img = crop_image(img, x, y, w, h, trim=1) | |
# Add a border around the image for better recognition | |
sq_img = cv2.copyMakeBorder(sq_img, 15, 15, 15, 15, cv2.BORDER_CONSTANT, value=(255,255,255)) | |
# Read the koma character. Identify if it is upside down or not. | |
# cv2.imshow(f"{row_idx}{col_idx}", sq_img) | |
koma_str, is_upside_down = read_koma_characters(sq_img) | |
# koma_str_rotated = read_koma_characters(np.flip(sq_img)) | |
if not koma_str: | |
continue | |
koma = Koma.make( | |
Side.GOTE if is_upside_down else Side.SENTE, | |
KTYPE_FROM_KANJI[koma_str] | |
) | |
print(f"{row_idx+1},{col_idx+1}: {'v' if is_upside_down else ''}{koma_str} = {str(koma)}") | |
sq = Square.from_cr(*xynum_to_cr(col_idx, row_idx)) | |
pos.set_koma(koma, sq) | |
print(pos.to_sfen()) | |
# cv2.imshow("Source", img) | |
# cv2.waitKey() | |
return | |
def find_board_lines(img): | |
dst = detect_edges(img) | |
lines = [(line[0][0], line[0][1]) for line in find_lines(dst)] | |
horizontal_lines = to_pair_list(sorted( | |
(int(r) for r, theta in lines if is_line_horizontal(r, theta)) | |
)) | |
vertical_lines = to_pair_list(sorted( | |
(int(r) for r, theta in lines if is_line_vertical(r, theta)) | |
)) | |
# print(horizontal_lines) | |
# print(vertical_lines) | |
# # now assume it has been deskewed | |
# # ys = [int(r) for r, _ in horizontal_lines] | |
# # xs = [int(r) for r, _ in vertical_lines] | |
return vertical_lines, horizontal_lines | |
def detect_edges(img): | |
return cv2.Canny(img, 50, 200, None, 3) | |
def find_lines(img): | |
return cv2.HoughLines( | |
img, rho=1, theta=math.pi/180, threshold=300, | |
lines=None, srn=0, stn=0 | |
) | |
def is_line_horizontal(r, theta, threshold=3*math.pi/180): | |
return -threshold < theta - math.pi/2 < threshold | |
def is_line_vertical(r, theta, threshold=3*math.pi/180): | |
return -threshold < theta - 0 < threshold | |
def to_pair_list(xs, threshold=5): | |
# If the xs do not fall into pairs within `threshold` of each other, duplicate the necessary coords to make it so, and return a sorted list of the pairs. | |
sorted_xs = sorted(xs) | |
res = [] | |
prev_x = None | |
for x in sorted_xs: | |
if prev_x is None: | |
prev_x = x | |
continue | |
elif x > prev_x + threshold: | |
res.append((prev_x, prev_x)) | |
prev_x = x | |
continue | |
else: | |
res.append((prev_x, x)) | |
prev_x = None | |
continue | |
if prev_x is not None: | |
res.append((prev_x, prev_x)) | |
return res | |
def crop_image(img, x, y, w, h, trim=0): | |
# `trim` specifies width in pixels of strip to remove around each edge. | |
x1 = x + trim | |
x2 = x + w - 2 * trim | |
y1 = y + trim | |
y2 = y + h - 2 * trim | |
return img[y1:y2, x1:x2] | |
def char_from_data(data): | |
for i, conf in enumerate(data["conf"]): | |
if conf > 0: | |
return data["text"][i], conf | |
return "", -1 | |
def read_koma_characters(img): | |
# returns a koma and whether it is upside down | |
img180 = np.flip(img) | |
koma_data = pytesseract.image_to_data( | |
img, config='-l jpn --psm 10', output_type=Output.DICT | |
) | |
koma180_data = pytesseract.image_to_data( | |
img180, config='-l jpn --psm 10', output_type=Output.DICT | |
) | |
ch, conf = char_from_data(koma_data) | |
ch180, conf180 = char_from_data(koma180_data) | |
if ch in KOMA_CHARS and ch180 not in KOMA_CHARS: | |
return ch, False | |
if ch not in KOMA_CHARS and ch180 in KOMA_CHARS: | |
return ch180, True | |
if ch in KOMA_CHARS and ch180 in KOMA_CHARS: | |
return (ch, False) if conf > conf180 else (ch180, True) | |
return "", False | |
def _find_two_points(r, theta): | |
a = math.cos(theta) | |
b = math.sin(theta) | |
x0 = a * r | |
y0 = b * r | |
pt1 = (int(x0 - 1000 * b), int(y0 + 1000 * a)) | |
pt2 = (int(x0 + 1000 * b), int(y0 - 1000 * a)) | |
return (pt1, pt2) | |
def xynum_to_cr(x_num, y_num): | |
return 9 - x_num, y_num + 1 | |
# Might need to apply threshold with different values in order to correctly read the board character. | |
def apply_threshold(img): | |
return cv2.threshold(img, 165, 255, cv2.THRESH_BINARY, cv2.THRESH_OTSU)[1] | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment