Created July 2, 2024 07:30
Python script to convert shogi board image to SFEN (needs tsumemi)
import math
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from basetypes import Koma, Side, KTYPE_FROM_KANJI
from position import Position
from square import Square
import sys
KOMA_CHARS = set(("成", "歩", "香", "桂", "銀", "金", "角", "飛", "玉", "と", "龍", "竜", "馬", "全", "圭", "杏"))
def main():
img = cv2.imread("input.png", cv2.IMREAD_GRAYSCALE)
if img is None:
print("image failed to open")
xs, ys = find_board_lines(img)
img = apply_threshold(img) # apparently this is an idempotent operation?
if len(xs) != 10 or len(ys) != 10:
print("Did not identify a 9x9 grid")
# print("y values for horizontal lines:")
# print(ys)
# print("x values for vertical lines")
# print(xs)
# build a sfen one square at a time
pos = Position()
for row_idx in range(9):
for col_idx in range(9):
# Crop to fit a single piece
x = xs[col_idx][1]
y = ys[row_idx][1]
w = xs[col_idx+1][0] - x
h = ys[row_idx+1][0] - y
sq_img = crop_image(img, x, y, w, h, trim=1)
# Add a border around the image for better recognition
sq_img = cv2.copyMakeBorder(sq_img, 15, 15, 15, 15, cv2.BORDER_CONSTANT, value=(255,255,255))
# Read the koma character. Identify if it is upside down or not.
# cv2.imshow(f"{row_idx}{col_idx}", sq_img)
koma_str, is_upside_down = read_koma_characters(sq_img)
# koma_str_rotated = read_koma_characters(np.flip(sq_img))
if not koma_str:
koma = Koma.make(
Side.GOTE if is_upside_down else Side.SENTE,
print(f"{row_idx+1},{col_idx+1}: {'v' if is_upside_down else ''}{koma_str} = {str(koma)}")
sq = Square.from_cr(*xynum_to_cr(col_idx, row_idx))
pos.set_koma(koma, sq)
# cv2.imshow("Source", img)
# cv2.waitKey()
def find_board_lines(img):
dst = detect_edges(img)
lines = [(line[0][0], line[0][1]) for line in find_lines(dst)]
horizontal_lines = to_pair_list(sorted(
(int(r) for r, theta in lines if is_line_horizontal(r, theta))
vertical_lines = to_pair_list(sorted(
(int(r) for r, theta in lines if is_line_vertical(r, theta))
# print(horizontal_lines)
# print(vertical_lines)
# # now assume it has been deskewed
# # ys = [int(r) for r, _ in horizontal_lines]
# # xs = [int(r) for r, _ in vertical_lines]
return vertical_lines, horizontal_lines
def detect_edges(img):
return cv2.Canny(img, 50, 200, None, 3)
def find_lines(img):
return cv2.HoughLines(
img, rho=1, theta=math.pi/180, threshold=300,
lines=None, srn=0, stn=0
def is_line_horizontal(r, theta, threshold=3*math.pi/180):
return -threshold < theta - math.pi/2 < threshold
def is_line_vertical(r, theta, threshold=3*math.pi/180):
return -threshold < theta - 0 < threshold
def to_pair_list(xs, threshold=5):
# If the xs do not fall into pairs within `threshold` of each other, duplicate the necessary coords to make it so, and return a sorted list of the pairs.
sorted_xs = sorted(xs)
res = []
prev_x = None
for x in sorted_xs:
if prev_x is None:
prev_x = x
elif x > prev_x + threshold:
res.append((prev_x, prev_x))
prev_x = x
res.append((prev_x, x))
prev_x = None
if prev_x is not None:
res.append((prev_x, prev_x))
return res
def crop_image(img, x, y, w, h, trim=0):
# `trim` specifies width in pixels of strip to remove around each edge.
x1 = x + trim
x2 = x + w - 2 * trim
y1 = y + trim
y2 = y + h - 2 * trim
return img[y1:y2, x1:x2]
def char_from_data(data):
for i, conf in enumerate(data["conf"]):
if conf > 0:
return data["text"][i], conf
return "", -1
def read_koma_characters(img):
# returns a koma and whether it is upside down
img180 = np.flip(img)
koma_data = pytesseract.image_to_data(
img, config='-l jpn --psm 10', output_type=Output.DICT
koma180_data = pytesseract.image_to_data(
img180, config='-l jpn --psm 10', output_type=Output.DICT
ch, conf = char_from_data(koma_data)
ch180, conf180 = char_from_data(koma180_data)
if ch in KOMA_CHARS and ch180 not in KOMA_CHARS:
return ch, False
if ch not in KOMA_CHARS and ch180 in KOMA_CHARS:
return ch180, True
if ch in KOMA_CHARS and ch180 in KOMA_CHARS:
return (ch, False) if conf > conf180 else (ch180, True)
return "", False
def _find_two_points(r, theta):
a = math.cos(theta)
b = math.sin(theta)
x0 = a * r
y0 = b * r
pt1 = (int(x0 - 1000 * b), int(y0 + 1000 * a))
pt2 = (int(x0 + 1000 * b), int(y0 - 1000 * a))
return (pt1, pt2)
def xynum_to_cr(x_num, y_num):
return 9 - x_num, y_num + 1
# Might need to apply threshold with different values in order to correctly read the board character.
def apply_threshold(img):
return cv2.threshold(img, 165, 255, cv2.THRESH_BINARY, cv2.THRESH_OTSU)[1]
if __name__ == "__main__":
