Ivorforce · August 11, 2024 20:41
diff --git a/Steamboat-Willie-Renderer.py b/Steamboat-Willie-Renderer.py
 #%%
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import matplotlib.pyplot as plt
 from einops import rearrange, reduce, repeat
 import unicodedata
 import string
 import sys
 from pathlib import Path

 def mirror_last2_axes(array):
    array = np.concatenate((array, np.flip(array, axis=-1)), axis=-1)
    array = np.concatenate((array, np.flip(array, axis=-2)), axis=-2)
    return array

 import cv2
 #%%
 visible_chars = string.digits + string.ascii_letters + string.punctuation + " "
 #visible_chars = [chr(i) for i in range(32, 128)]
 print(f"Visible chars: {len(visible_chars)}")

 font = ImageFont.truetype("/System/Library/Fonts/Monaco.ttf", 10)
 all_bboxes = np.array(tuple(font.getbbox(char) for char in visible_chars))
 bbox = np.array((
    np.min(all_bboxes[:, 0]),
    np.min(all_bboxes[:, 1]),
    np.max(all_bboxes[:, 2]),
    np.max(all_bboxes[:, 3]),
 ))
 # 'Emulate' line spacing of a normal text editor, i.e. 1.08
 bbox[3] = int(bbox[3] + (bbox[3] - bbox[1]) * 0.08)
 char_w = bbox[2] - bbox[0]
 char_h = bbox[3] - bbox[1]

 canvas = Image.new('F', [char_w, char_h], 1)
 draw = ImageDraw.Draw(canvas)

 ascii = []
 ascii_chars = []
 ascii_set = set()
 for i, char in enumerate(visible_chars):
    draw.text((-bbox[0], -bbox[1]), char, font=font, fill=0)

    array = np.array(canvas)
    char_hash = hash(array.data.tobytes())
    if char_hash not in ascii_set:
        ascii_set.add(char_hash)
        ascii.append(array)
        ascii_chars.append(char)
    draw.rectangle((0, 0, char_w, char_h), fill=1)

 ascii = np.array(ascii)
 ascii = mirror_last2_axes(ascii)
 print(ascii.shape, bbox)

 #plt.imshow(rearrange(ascii[32:48], 'c w h -> (c w) h'))

 # Phase is meaningless for the 'average brightness pixel'?
 #ascii_fft[:, :1, :] = np.abs(ascii_fft[:, :1, :])
 #ascii_fft[:, :, :1] = np.abs(ascii_fft[:, :, :1])
 #%%
 freq_grid = np.stack(np.meshgrid(np.fft.fftfreq(ascii.shape[2]), np.fft.fftfreq(ascii.shape[1])), axis=2)
 freq_len = np.linalg.norm(freq_grid, axis=-1)
 #freq_weights = (1 + 500 * freq_len ** 2) / (1 + 500 * freq_len ** 4)
 #freq_weights = (1 + 1 * freq_len ** 1)
 freq_weights = (1 - freq_len) ** 12
 #freq_weights[:] = 1

 #ascii_fft = (ascii_fft / np.maximum(0.001, np.abs(ascii_fft))) * freq_weights
 plt.imshow(freq_weights)
 print(np.min(freq_weights), np.max(freq_weights))

 # Weight the font
 ascii_fft = np.fft.fft2(ascii, axes=(-2, -1))
 ascii_fft_weighted = ascii_fft * freq_weights
 ascii_fft_magn = np.abs(ascii_fft_weighted)
 #%%
 capture = cv2.VideoCapture("/Users/lukas/Downloads/willie-1080.webm")

 width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
 height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
 fps = capture.get(cv2.CAP_PROP_FPS)

 print(width, height, fps)

 out = cv2.VideoWriter() 
 if not out.open('willie.mov', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height), False):
    raise IOError()

 # capture.set(cv2.CAP_PROP_POS_FRAMES, 2000)

 diff = np.zeros((19440, ascii.shape[0]), dtype=np.float32)
 idxs = np.zeros(19440, dtype=np.int32)
 img_old = np.zeros((height // char_h * char_h, width // char_w * char_w))

 # The darkest ascii characters aren't nearly as dark as the video dark bits.
 # To make use of the full spectrum of characters (i.e. not clip),
 # we use the rescale factor to decrease the image darkness such that our
 # characters can match it properly.
 rescale = 2.0

 # Frame counter for convenience
 frame_n = 0
 try:
    while capture.isOpened():
        ret, img_src = capture.read()
        frame_n += 1
        
        if frame_n % 10 == 0:
            print(frame_n)
        
        # if frame_n > 150:
        #     break
        
        if not ret:
            break
     
        img = img_src / 255
        # Rescale image brightness, see above explanation
        img = (1 - (1 - img) / rescale)
        
        # Resolution fix
        img = img[:img.shape[0] // char_h * char_h, :img.shape[1] // char_w * char_w]
        
        # It's already black and white, but it can't hurt to be sure about it.
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        # Slight motion blur to reduce noise
        img = img * 0.8 + img_old * 0.2
        img_old = img

        img_cols = img.shape[1] // char_w
        img_rows = img.shape[0] // char_h
        img = rearrange(img, '(w1 w2) (h1 h2) -> (w1 h1) w2 h2', w2=char_h, h2=char_w)
        img = mirror_last2_axes(img)
        
        img_fft = np.fft.fft2(img, axes=(-2, -1))
        img_fft_weighted = img_fft * freq_weights
        img_fft_magn = np.abs(img_fft_weighted)

        #diff = np.empty((img_fft.shape[0], ascii_fft.shape[0]), dtype=np.float32)
        #print(img_fft.shape[0], ascii_fft.shape[0])
        for i in range(ascii_fft.shape[0]):
            diff[:, i] = np.linalg.norm(np.abs(img_fft_weighted - ascii_fft_weighted[None, i]), axis=(-1, -2))

        # weigh prev frame a bit better
        for i in range(img.shape[0]):
            diff[i, idxs[i]] *= 0.95
        
        idxs[:] = np.argmin(diff, axis=-1)
        
        img_ascii = np.empty_like(img)
        for i in range(img.shape[0]):
            img_ascii[i] = ascii[idxs[i]]
        img_ascii = img_ascii[..., :char_h, :char_w]
        
        img_ascii = rearrange(img_ascii, '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols)
        if img_ascii.shape[-2] < height or img_ascii.shape[-1] < width:
            img_ascii = np.pad(img_ascii, ((0, height - img_ascii.shape[-2]), (0, width - img_ascii.shape[-1])), 'constant', constant_values=0)
        
        out.write((img_ascii * 255).astype(np.uint8))
 finally:
    capture.release()
    out.release()
    cv2.destroyAllWindows()
 #%%
 cap = cv2.VideoCapture("willie.mov")
 ret, img2 = cap.read()
 print(img2)
 plt.imshow(img2)
 cap.release()
 #%%
 print(img_ascii.shape)
 print(width, height)
 #%%
 plt.figure(figsize=(20, 20))
 plt.imshow(rearrange(img[..., :char_h, :char_w], '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols), cmap="gray")
 #%%
 plt.imshow(np.abs(np.fft.ifft2(ascii_fft_weighted[-6], axes=(-2, -1))[..., :char_h, :char_w]), cmap="gray")
 #%%
 img_weighted = np.abs(np.fft.ifft2(img_fft_weighted, axes=(-2, -1)))
 ascii_weighted = np.abs(np.fft.ifft2(ascii_fft_weighted, axes=(-2, -1)))

 img_min = np.min(np.mean(img_weighted, axis=(1, 2)))
 ascii_min = np.min(np.mean(ascii_weighted, axis=(1, 2)))
 img_max = np.max(np.mean(img_weighted, axis=(1, 2)))
 ascii_max = np.max(np.mean(ascii_weighted, axis=(1, 2)))
 print(img_min, img_max)
 print(ascii_min, ascii_max)
 rescale = (img_max - img_min) / (ascii_max - ascii_min)
 print(rescale)
 #%%
	#%%
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	import matplotlib.pyplot as plt
	from einops import rearrange, reduce, repeat
	import unicodedata
	import string
	import sys
	from pathlib import Path

	def mirror_last2_axes(array):
	array = np.concatenate((array, np.flip(array, axis=-1)), axis=-1)
	array = np.concatenate((array, np.flip(array, axis=-2)), axis=-2)
	return array

	import cv2
	#%%
	visible_chars = string.digits + string.ascii_letters + string.punctuation + " "
	#visible_chars = [chr(i) for i in range(32, 128)]
	print(f"Visible chars: {len(visible_chars)}")

	font = ImageFont.truetype("/System/Library/Fonts/Monaco.ttf", 10)
	all_bboxes = np.array(tuple(font.getbbox(char) for char in visible_chars))
	bbox = np.array((
	np.min(all_bboxes[:, 0]),
	np.min(all_bboxes[:, 1]),
	np.max(all_bboxes[:, 2]),
	np.max(all_bboxes[:, 3]),
	))
	# 'Emulate' line spacing of a normal text editor, i.e. 1.08
	bbox[3] = int(bbox[3] + (bbox[3] - bbox[1]) * 0.08)
	char_w = bbox[2] - bbox[0]
	char_h = bbox[3] - bbox[1]

	canvas = Image.new('F', [char_w, char_h], 1)
	draw = ImageDraw.Draw(canvas)

	ascii = []
	ascii_chars = []
	ascii_set = set()
	for i, char in enumerate(visible_chars):
	draw.text((-bbox[0], -bbox[1]), char, font=font, fill=0)

	array = np.array(canvas)
	char_hash = hash(array.data.tobytes())
	if char_hash not in ascii_set:
	ascii_set.add(char_hash)
	ascii.append(array)
	ascii_chars.append(char)
	draw.rectangle((0, 0, char_w, char_h), fill=1)

	ascii = np.array(ascii)
	ascii = mirror_last2_axes(ascii)
	print(ascii.shape, bbox)

	#plt.imshow(rearrange(ascii[32:48], 'c w h -> (c w) h'))

	# Phase is meaningless for the 'average brightness pixel'?
	#ascii_fft[:, :1, :] = np.abs(ascii_fft[:, :1, :])
	#ascii_fft[:, :, :1] = np.abs(ascii_fft[:, :, :1])
	#%%
	freq_grid = np.stack(np.meshgrid(np.fft.fftfreq(ascii.shape[2]), np.fft.fftfreq(ascii.shape[1])), axis=2)
	freq_len = np.linalg.norm(freq_grid, axis=-1)
	#freq_weights = (1 + 500 * freq_len ** 2) / (1 + 500 * freq_len ** 4)
	#freq_weights = (1 + 1 * freq_len ** 1)
	freq_weights = (1 - freq_len) ** 12
	#freq_weights[:] = 1

	#ascii_fft = (ascii_fft / np.maximum(0.001, np.abs(ascii_fft))) * freq_weights
	plt.imshow(freq_weights)
	print(np.min(freq_weights), np.max(freq_weights))

	# Weight the font
	ascii_fft = np.fft.fft2(ascii, axes=(-2, -1))
	ascii_fft_weighted = ascii_fft * freq_weights
	ascii_fft_magn = np.abs(ascii_fft_weighted)
	#%%
	capture = cv2.VideoCapture("/Users/lukas/Downloads/willie-1080.webm")

	width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = capture.get(cv2.CAP_PROP_FPS)

	print(width, height, fps)

	out = cv2.VideoWriter()
	if not out.open('willie.mov', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height), False):
	raise IOError()

	# capture.set(cv2.CAP_PROP_POS_FRAMES, 2000)

	diff = np.zeros((19440, ascii.shape[0]), dtype=np.float32)
	idxs = np.zeros(19440, dtype=np.int32)
	img_old = np.zeros((height // char_h * char_h, width // char_w * char_w))

	# The darkest ascii characters aren't nearly as dark as the video dark bits.
	# To make use of the full spectrum of characters (i.e. not clip),
	# we use the rescale factor to decrease the image darkness such that our
	# characters can match it properly.
	rescale = 2.0

	# Frame counter for convenience
	frame_n = 0
	try:
	while capture.isOpened():
	ret, img_src = capture.read()
	frame_n += 1

	if frame_n % 10 == 0:
	print(frame_n)

	# if frame_n > 150:
	# break

	if not ret:
	break

	img = img_src / 255
	# Rescale image brightness, see above explanation
	img = (1 - (1 - img) / rescale)

	# Resolution fix
	img = img[:img.shape[0] // char_h * char_h, :img.shape[1] // char_w * char_w]

	# It's already black and white, but it can't hurt to be sure about it.
	img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
	# Slight motion blur to reduce noise
	img = img * 0.8 + img_old * 0.2
	img_old = img

	img_cols = img.shape[1] // char_w
	img_rows = img.shape[0] // char_h
	img = rearrange(img, '(w1 w2) (h1 h2) -> (w1 h1) w2 h2', w2=char_h, h2=char_w)
	img = mirror_last2_axes(img)

	img_fft = np.fft.fft2(img, axes=(-2, -1))
	img_fft_weighted = img_fft * freq_weights
	img_fft_magn = np.abs(img_fft_weighted)

	#diff = np.empty((img_fft.shape[0], ascii_fft.shape[0]), dtype=np.float32)
	#print(img_fft.shape[0], ascii_fft.shape[0])
	for i in range(ascii_fft.shape[0]):
	diff[:, i] = np.linalg.norm(np.abs(img_fft_weighted - ascii_fft_weighted[None, i]), axis=(-1, -2))

	# weigh prev frame a bit better
	for i in range(img.shape[0]):
	diff[i, idxs[i]] *= 0.95

	idxs[:] = np.argmin(diff, axis=-1)

	img_ascii = np.empty_like(img)
	for i in range(img.shape[0]):
	img_ascii[i] = ascii[idxs[i]]
	img_ascii = img_ascii[..., :char_h, :char_w]

	img_ascii = rearrange(img_ascii, '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols)
	if img_ascii.shape[-2] < height or img_ascii.shape[-1] < width:
	img_ascii = np.pad(img_ascii, ((0, height - img_ascii.shape[-2]), (0, width - img_ascii.shape[-1])), 'constant', constant_values=0)

	out.write((img_ascii * 255).astype(np.uint8))
	finally:
	capture.release()
	out.release()
	cv2.destroyAllWindows()
	#%%
	cap = cv2.VideoCapture("willie.mov")
	ret, img2 = cap.read()
	print(img2)
	plt.imshow(img2)
	cap.release()
	#%%
	print(img_ascii.shape)
	print(width, height)
	#%%
	plt.figure(figsize=(20, 20))
	plt.imshow(rearrange(img[..., :char_h, :char_w], '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols), cmap="gray")
	#%%
	plt.imshow(np.abs(np.fft.ifft2(ascii_fft_weighted[-6], axes=(-2, -1))[..., :char_h, :char_w]), cmap="gray")
	#%%
	img_weighted = np.abs(np.fft.ifft2(img_fft_weighted, axes=(-2, -1)))
	ascii_weighted = np.abs(np.fft.ifft2(ascii_fft_weighted, axes=(-2, -1)))

	img_min = np.min(np.mean(img_weighted, axis=(1, 2)))
	ascii_min = np.min(np.mean(ascii_weighted, axis=(1, 2)))
	img_max = np.max(np.mean(img_weighted, axis=(1, 2)))
	ascii_max = np.max(np.mean(ascii_weighted, axis=(1, 2)))
	print(img_min, img_max)
	print(ascii_min, ascii_max)
	rescale = (img_max - img_min) / (ascii_max - ascii_min)
	print(rescale)
	#%%