|
import time |
|
import mss |
|
from trigger_key import ( |
|
PressKey, |
|
ReleaseKey, |
|
VK_UP_ARROW_KEY, |
|
VK_DOWN_ARROW_KEY, |
|
VK_LEFT_ARROW_KEY, |
|
VK_RIGHT_ARROW_KEY, |
|
) |
|
from PIL import Image, ImageStat |
|
import cv2 as cv |
|
import numpy as np |
|
|
|
# DEBUG = True |
|
DEBUG = False |
|
|
|
# When doing the Kiryu singing parts, this should be True |
|
# for the accompaniment songs (with the pink needle), set it |
|
# to False |
|
IS_NEEDLE_BLUE = True |
|
# IS_NEEDLE_BLUE = False |
|
|
|
# Note: there are plenty of hard-coded pixel values in this code, so you're |
|
# best off matching these dimensions as close you can (maybe adjust the top if |
|
# necessary) |
|
GAME_WINDOW_DIMENSIONS = {"top": 45, "left": 0, "width": 1280, "height": 720} |
|
|
|
KEYPRESS_DURATION_SEC = 0.02 |
|
DRUM_DOWNTIME_SEC = 0.1 |
|
ROW_HEIGHT = int(GAME_WINDOW_DIMENSIONS["height"] * 15 / 72) |
|
TEMPLATE_MATCH_THRESHOLD = 0.80 |
|
MAX_PIXELS_BEFORE_PLAYING_NOTE = 18 |
|
MIN_PIXELS_BEFORE_PLAYING_NOTE = 5 |
|
|
|
notes_info = { |
|
"down": {"x": 5, "key": VK_DOWN_ARROW_KEY}, |
|
"right": {"x": 47, "key": VK_RIGHT_ARROW_KEY}, |
|
"up": {"x": 88, "key": VK_UP_ARROW_KEY}, |
|
"left": {"x": 130, "key": VK_LEFT_ARROW_KEY}, |
|
} |
|
|
|
note_templates = cv.imread("notes.png", 0) |
|
TEMPLATE_WIDTH = 13 |
|
TEMPLATE_HEIGHT = 19 |
|
RIGHT_TEMPLATE_OFFSET = 12 |
|
for note_info in notes_info.values(): |
|
x = note_info["x"] |
|
y = 9 |
|
note_info["template_left"] = note_templates[ |
|
y : y + TEMPLATE_HEIGHT, x : x + TEMPLATE_WIDTH |
|
] |
|
note_info["template_right"] = note_templates[ |
|
y : y + TEMPLATE_HEIGHT, |
|
x + RIGHT_TEMPLATE_OFFSET : x + RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH, |
|
] |
|
|
|
|
|
sct = mss.mss() |
|
|
|
|
|
def get_left_side_image(): |
|
"""Screenshot the left side of the screen, so we can |
|
search for the needle after it changes rows |
|
""" |
|
return np.asarray( |
|
sct.grab( |
|
{ |
|
**GAME_WINDOW_DIMENSIONS, |
|
"width": int(GAME_WINDOW_DIMENSIONS["width"] / 4), |
|
} |
|
) |
|
) |
|
|
|
|
|
def get_row_image(y_offset): |
|
"""Screenshot a row of notes and lyrics""" |
|
return np.asarray( |
|
sct.grab( |
|
{ |
|
**GAME_WINDOW_DIMENSIONS, |
|
"top": int(GAME_WINDOW_DIMENSIONS["top"] + max(0, y_offset)), |
|
"height": ROW_HEIGHT, |
|
} |
|
) |
|
) |
|
|
|
|
|
def extract_blue_needle_region(img_hsv): |
|
low_H = 92 |
|
high_H = 115 |
|
low_S = 118 |
|
high_S = 255 |
|
low_V = 71 |
|
high_V = 255 |
|
mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V)) |
|
|
|
return mask |
|
|
|
|
|
def extract_pink_needle_region(img_hsv): |
|
low_H = 158 |
|
high_H = 169 |
|
low_S = 143 |
|
high_S = 186 |
|
low_V = 107 |
|
high_V = 223 |
|
mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V)) |
|
|
|
return mask |
|
|
|
|
|
def get_needle_location(img_hsv, min_x=None): |
|
"""Locate the needle using its color and line detection""" |
|
|
|
# If a min_x has been set, we focus in on the only areas we want to |
|
# consider that the needle could be. |
|
# Also, it's a bit hacky, but the bottom 1/8 of the image |
|
# (containing the notes) has been trimmed off to make it less likely |
|
# to detect lines in the row below |
|
if min_x is not None: |
|
img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8), min_x : min_x + 120] |
|
else: |
|
img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8)] |
|
|
|
# Extract the region of the image with the needle by its hue, targeted to the |
|
# needle color we want. In retrospective, maybe I could have looked into |
|
# template matching (as I did to identify notes) here as well, although |
|
# it might not have been very resilient to the explosions of color |
|
# the notes make when played. Anyway. Follow up with canny edge detection |
|
# to get nice outlines for the Hough Transform to use for line detection. |
|
global IS_NEEDLE_BLUE |
|
if IS_NEEDLE_BLUE: |
|
img_outlines = cv.Canny(extract_blue_needle_region(img_hsv), 70, 200, None, 3) |
|
else: |
|
img_outlines = cv.Canny(extract_pink_needle_region(img_hsv), 70, 200, None, 3) |
|
|
|
lines = cv.HoughLinesP(img_outlines, 1, np.pi, 30, None, 30, 10) |
|
|
|
if DEBUG and img_hsv.shape[0] <= ROW_HEIGHT: |
|
cv.imshow("outline", img_outlines) |
|
cv.moveWindow("outline", 10, 720 + ROW_HEIGHT) |
|
if cv.waitKey(25) & 0xFF == ord("q"): |
|
cv.destroyAllWindows() |
|
|
|
if lines is None: |
|
return 0, 0, False |
|
|
|
# Account for the x offset that would have occurred by working with a trimmed image |
|
if min_x is not None: |
|
lines[:, :, 0] += min_x |
|
lines[:, :, 2] += min_x |
|
|
|
# Very rough approximation of the center point of the needle, using |
|
# averages of the x's and y's in the lines detected. |
|
# It's very weak to noise in the form of other things incorrectly |
|
# identified as lines, but my general approach was to eliminate the |
|
# noise itself before worrying about how I calculated this. |
|
mean = lambda index: np.mean([l[0][index] for l in lines]) |
|
avg_x = int(mean(0)) |
|
avg_y = int((mean(1) + mean(3)) / 2) |
|
|
|
return avg_x, avg_y, True |
|
|
|
|
|
def get_template_match(img, template): |
|
"""Returns an array of x's and y's corresponding to points where |
|
a template image was most likely to be found (using a threshold) |
|
""" |
|
res = cv.matchTemplate(img, template, cv.TM_CCOEFF_NORMED) |
|
return np.where(res > TEMPLATE_MATCH_THRESHOLD) |
|
|
|
|
|
def find_notes(row_img): |
|
"""Locates the notes in the image, and identifies if they should be held or drummed""" |
|
|
|
# Only look at the very bottom part of the image, which contains the notes |
|
img_bottom = row_img[int(row_img.shape[0] * 3 / 4) :, :] |
|
|
|
# Get a HSV representation of the image so we can check hue values |
|
# on the right side of the notes to determine if they should |
|
# be drummed or held |
|
img_hsv = cv.cvtColor(img_bottom, cv.COLOR_BGR2HSV) |
|
|
|
# Get a grayscale representation of the image, necessary for template matching |
|
img = cv.cvtColor(img_bottom, cv.COLOR_BGRA2GRAY) |
|
|
|
notes = [] |
|
for note_name, note_info in notes_info.items(): |
|
# To avoid missing notes that are overlapped on either |
|
# their right or left sides, we match on both sides independently, |
|
# and then combine the results |
|
left_side_matches = get_template_match(img, note_info["template_left"]) |
|
right_loc = get_template_match(img, note_info["template_right"]) |
|
|
|
# Put the located x's and y's into a nicer format to work with |
|
xs = np.sort( |
|
np.concatenate( |
|
[ |
|
# Since the template match finds the left side of the note, |
|
# here we add TEMPLATE_WIDTH to record the x at roughly the |
|
# the note |
|
left_side_matches[1] + TEMPLATE_WIDTH, |
|
# and for the right side, we first eliminate the offset to make |
|
# the location |
|
right_loc[1] - RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH, |
|
] |
|
) |
|
) |
|
ys = np.sort(np.concatenate([left_side_matches[0], right_loc[0]])) |
|
|
|
# If there were any template matches for this note, process them |
|
if len(xs) > 0: |
|
y = ys[0] |
|
|
|
last_x = -100 |
|
for x in xs: |
|
if x - last_x < TEMPLATE_WIDTH: |
|
# Skip repeated hits of the same template in the same space |
|
# Pretty hacky, but it works |
|
continue |
|
last_x = x |
|
|
|
# Using a single pixel slightly to the right of the note, we |
|
# check if the color looks close to what would appear if the |
|
# note has a HOLD or DRUM bar after it. Surprisingly reliable. |
|
check_pixel = (x + 35, int(y + TEMPLATE_HEIGHT / 2)) |
|
[hue, saturation, value] = img_hsv[check_pixel[1], check_pixel[0]] |
|
has_drum = value > 20 and saturation > 20 and 147 < hue < 167 |
|
has_hold = value > 20 and saturation > 20 and 82 < hue < 102 |
|
|
|
notes.append((x, note_name, has_drum, has_hold)) |
|
|
|
# Sort the notes from left to right |
|
notes.sort(key=lambda n: n[0]) |
|
|
|
# Print out the notes detected |
|
print( |
|
"new line", |
|
",".join( |
|
f"{f[1]}{'-drum' if f[2] else ''}{'-hold' if f[3] else ''}" for f in notes |
|
), |
|
) |
|
|
|
return notes |
|
|
|
|
|
def karaoke_singer(): |
|
last_known_row_y = 0 |
|
notes = None |
|
|
|
fps = 0 |
|
last_time = time.time() |
|
last_start_x = None |
|
last_needle_x = None |
|
frames_since_row_change = 0 |
|
last_drum_time = time.time() |
|
drum_key = None |
|
hold_key = None |
|
missed_frames = 0 |
|
|
|
# The main karaoke-playing loop |
|
while True: |
|
frames_since_row_change += 1 |
|
img = get_row_image(last_known_row_y) |
|
img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV) |
|
needle_x, needle_y, needle_was_found = get_needle_location( |
|
img_hsv, last_needle_x |
|
) |
|
|
|
if not needle_was_found: |
|
# needle was not found at the row we expected. |
|
# try to find it at the beginning of another row |
|
|
|
# Allow the needle to go missing just for a bit before |
|
# searching other rows |
|
if missed_frames < 1: |
|
missed_frames += 1 |
|
continue |
|
|
|
last_needle_x = None |
|
img = get_left_side_image() |
|
img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV) |
|
needle_x, needle_y, needle_was_found = get_needle_location(img_hsv, 80) |
|
|
|
if not needle_was_found: |
|
# if we still couldn't find it, give up |
|
continue |
|
|
|
# Reset the notes we identified on the previous row |
|
notes = None |
|
last_start_x = needle_x |
|
frames_since_row_change = 0 |
|
|
|
# Snap the needle y value up to the closest row y |
|
screen_top_padding = int(ROW_HEIGHT * 90 / 300) |
|
last_known_row_y = ( |
|
(needle_y - screen_top_padding) // ROW_HEIGHT |
|
) * ROW_HEIGHT + screen_top_padding |
|
|
|
# Prepare the row image for note-finding |
|
img = get_row_image(last_known_row_y) |
|
|
|
# Reset some stuff since we found the needle |
|
missed_frames = 0 |
|
last_needle_x = needle_x |
|
|
|
# When the needle is at the beginning of a row, |
|
# scope out all the notes coming up |
|
if notes is None: |
|
notes = find_notes(img) |
|
|
|
# Get a very rough approximation of the speed per frame of the needle |
|
# This is super finicky, and I'm still not sure if it made a difference |
|
# in the long run. |
|
needle_speed = ( |
|
(needle_x - last_start_x) / frames_since_row_change |
|
if frames_since_row_change != 0 and last_start_x is not None |
|
else 10 |
|
) |
|
|
|
# Maximum distance the needle has to be from a note before |
|
# we attempt to play it |
|
strike_zone = min( |
|
MAX_PIXELS_BEFORE_PLAYING_NOTE, |
|
max(needle_speed * 3, MIN_PIXELS_BEFORE_PLAYING_NOTE), |
|
) |
|
|
|
# Rapid-fire key presses when drummed notes are active |
|
if drum_key is not None and last_drum_time < time.time() - DRUM_DOWNTIME_SEC: |
|
PressKey(drum_key) |
|
time.sleep(KEYPRESS_DURATION_SEC) |
|
ReleaseKey(drum_key) |
|
last_drum_time = time.time() |
|
|
|
for index, note in enumerate(notes): |
|
x, note_name, has_drum, has_hold = note |
|
|
|
# If we're close enough to a note, play it. |
|
if abs(x - needle_x) < strike_zone: |
|
print("you're welcome", note_name) |
|
notes.pop(index) |
|
|
|
if drum_key is not None: |
|
# just stop drumming |
|
drum_key = None |
|
break |
|
|
|
if hold_key is not None: |
|
# release the held key |
|
ReleaseKey(hold_key) |
|
hold_key = None |
|
break |
|
|
|
PressKey(notes_info[note_name]["key"]) |
|
if not has_hold: |
|
time.sleep(KEYPRESS_DURATION_SEC) |
|
ReleaseKey(notes_info[note_name]["key"]) |
|
|
|
hold_key = notes_info[note_name]["key"] if has_hold else None |
|
drum_key = notes_info[note_name]["key"] if has_drum else None |
|
break |
|
|
|
# Keep track of FPS for the detection, sometimes handy for debugging |
|
fps += 1 |
|
if time.time() - last_time >= 1: |
|
last_time = time.time() |
|
# print(f"fps: {fps}") |
|
fps = 0 |
|
|
|
# If debugging, draw the current row with the perceived location of the |
|
# needle drawn as a green line |
|
if DEBUG: |
|
cv.line( |
|
img, |
|
(needle_x - int(strike_zone), ROW_HEIGHT - 10), |
|
(needle_x + int(strike_zone), ROW_HEIGHT - 10), |
|
(0, 255, 0), |
|
5, |
|
) |
|
cv.line( |
|
img, |
|
(needle_x, ROW_HEIGHT - 30), |
|
(needle_x, ROW_HEIGHT - 10), |
|
(0, 255, 0), |
|
5, |
|
) |
|
window_title = "Test" |
|
cv.imshow(window_title, img) |
|
cv.moveWindow(window_title, 10, 720) |
|
if cv.waitKey(25) & 0xFF == ord("q"): |
|
cv.destroyAllWindows() |
|
break |
|
|
|
|
|
karaoke_singer() |
Images for README: