Created
August 11, 2024 20:41
-
-
Save Ivorforce/75a335a48e7660fc39d12ebae00194b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#%% | |
from PIL import Image, ImageDraw, ImageFont | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from einops import rearrange, reduce, repeat | |
import unicodedata | |
import string | |
import sys | |
from pathlib import Path | |
def mirror_last2_axes(array): | |
array = np.concatenate((array, np.flip(array, axis=-1)), axis=-1) | |
array = np.concatenate((array, np.flip(array, axis=-2)), axis=-2) | |
return array | |
import cv2 | |
#%% | |
visible_chars = string.digits + string.ascii_letters + string.punctuation + " " | |
#visible_chars = [chr(i) for i in range(32, 128)] | |
print(f"Visible chars: {len(visible_chars)}") | |
font = ImageFont.truetype("/System/Library/Fonts/Monaco.ttf", 10) | |
all_bboxes = np.array(tuple(font.getbbox(char) for char in visible_chars)) | |
bbox = np.array(( | |
np.min(all_bboxes[:, 0]), | |
np.min(all_bboxes[:, 1]), | |
np.max(all_bboxes[:, 2]), | |
np.max(all_bboxes[:, 3]), | |
)) | |
# 'Emulate' line spacing of a normal text editor, i.e. 1.08 | |
bbox[3] = int(bbox[3] + (bbox[3] - bbox[1]) * 0.08) | |
char_w = bbox[2] - bbox[0] | |
char_h = bbox[3] - bbox[1] | |
canvas = Image.new('F', [char_w, char_h], 1) | |
draw = ImageDraw.Draw(canvas) | |
ascii = [] | |
ascii_chars = [] | |
ascii_set = set() | |
for i, char in enumerate(visible_chars): | |
draw.text((-bbox[0], -bbox[1]), char, font=font, fill=0) | |
array = np.array(canvas) | |
char_hash = hash(array.data.tobytes()) | |
if char_hash not in ascii_set: | |
ascii_set.add(char_hash) | |
ascii.append(array) | |
ascii_chars.append(char) | |
draw.rectangle((0, 0, char_w, char_h), fill=1) | |
ascii = np.array(ascii) | |
ascii = mirror_last2_axes(ascii) | |
print(ascii.shape, bbox) | |
#plt.imshow(rearrange(ascii[32:48], 'c w h -> (c w) h')) | |
# Phase is meaningless for the 'average brightness pixel'? | |
#ascii_fft[:, :1, :] = np.abs(ascii_fft[:, :1, :]) | |
#ascii_fft[:, :, :1] = np.abs(ascii_fft[:, :, :1]) | |
#%% | |
freq_grid = np.stack(np.meshgrid(np.fft.fftfreq(ascii.shape[2]), np.fft.fftfreq(ascii.shape[1])), axis=2) | |
freq_len = np.linalg.norm(freq_grid, axis=-1) | |
#freq_weights = (1 + 500 * freq_len ** 2) / (1 + 500 * freq_len ** 4) | |
#freq_weights = (1 + 1 * freq_len ** 1) | |
freq_weights = (1 - freq_len) ** 12 | |
#freq_weights[:] = 1 | |
#ascii_fft = (ascii_fft / np.maximum(0.001, np.abs(ascii_fft))) * freq_weights | |
plt.imshow(freq_weights) | |
print(np.min(freq_weights), np.max(freq_weights)) | |
# Weight the font | |
ascii_fft = np.fft.fft2(ascii, axes=(-2, -1)) | |
ascii_fft_weighted = ascii_fft * freq_weights | |
ascii_fft_magn = np.abs(ascii_fft_weighted) | |
#%% | |
capture = cv2.VideoCapture("/Users/lukas/Downloads/willie-1080.webm") | |
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
fps = capture.get(cv2.CAP_PROP_FPS) | |
print(width, height, fps) | |
out = cv2.VideoWriter() | |
if not out.open('willie.mov', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height), False): | |
raise IOError() | |
# capture.set(cv2.CAP_PROP_POS_FRAMES, 2000) | |
diff = np.zeros((19440, ascii.shape[0]), dtype=np.float32) | |
idxs = np.zeros(19440, dtype=np.int32) | |
img_old = np.zeros((height // char_h * char_h, width // char_w * char_w)) | |
# The darkest ascii characters aren't nearly as dark as the video dark bits. | |
# To make use of the full spectrum of characters (i.e. not clip), | |
# we use the rescale factor to decrease the image darkness such that our | |
# characters can match it properly. | |
rescale = 2.0 | |
# Frame counter for convenience | |
frame_n = 0 | |
try: | |
while capture.isOpened(): | |
ret, img_src = capture.read() | |
frame_n += 1 | |
if frame_n % 10 == 0: | |
print(frame_n) | |
# if frame_n > 150: | |
# break | |
if not ret: | |
break | |
img = img_src / 255 | |
# Rescale image brightness, see above explanation | |
img = (1 - (1 - img) / rescale) | |
# Resolution fix | |
img = img[:img.shape[0] // char_h * char_h, :img.shape[1] // char_w * char_w] | |
# It's already black and white, but it can't hurt to be sure about it. | |
img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114 | |
# Slight motion blur to reduce noise | |
img = img * 0.8 + img_old * 0.2 | |
img_old = img | |
img_cols = img.shape[1] // char_w | |
img_rows = img.shape[0] // char_h | |
img = rearrange(img, '(w1 w2) (h1 h2) -> (w1 h1) w2 h2', w2=char_h, h2=char_w) | |
img = mirror_last2_axes(img) | |
img_fft = np.fft.fft2(img, axes=(-2, -1)) | |
img_fft_weighted = img_fft * freq_weights | |
img_fft_magn = np.abs(img_fft_weighted) | |
#diff = np.empty((img_fft.shape[0], ascii_fft.shape[0]), dtype=np.float32) | |
#print(img_fft.shape[0], ascii_fft.shape[0]) | |
for i in range(ascii_fft.shape[0]): | |
diff[:, i] = np.linalg.norm(np.abs(img_fft_weighted - ascii_fft_weighted[None, i]), axis=(-1, -2)) | |
# weigh prev frame a bit better | |
for i in range(img.shape[0]): | |
diff[i, idxs[i]] *= 0.95 | |
idxs[:] = np.argmin(diff, axis=-1) | |
img_ascii = np.empty_like(img) | |
for i in range(img.shape[0]): | |
img_ascii[i] = ascii[idxs[i]] | |
img_ascii = img_ascii[..., :char_h, :char_w] | |
img_ascii = rearrange(img_ascii, '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols) | |
if img_ascii.shape[-2] < height or img_ascii.shape[-1] < width: | |
img_ascii = np.pad(img_ascii, ((0, height - img_ascii.shape[-2]), (0, width - img_ascii.shape[-1])), 'constant', constant_values=0) | |
out.write((img_ascii * 255).astype(np.uint8)) | |
finally: | |
capture.release() | |
out.release() | |
cv2.destroyAllWindows() | |
#%% | |
cap = cv2.VideoCapture("willie.mov") | |
ret, img2 = cap.read() | |
print(img2) | |
plt.imshow(img2) | |
cap.release() | |
#%% | |
print(img_ascii.shape) | |
print(width, height) | |
#%% | |
plt.figure(figsize=(20, 20)) | |
plt.imshow(rearrange(img[..., :char_h, :char_w], '(w1 h1) w2 h2 -> (w1 w2) (h1 h2)', w1=img_rows, h1=img_cols), cmap="gray") | |
#%% | |
plt.imshow(np.abs(np.fft.ifft2(ascii_fft_weighted[-6], axes=(-2, -1))[..., :char_h, :char_w]), cmap="gray") | |
#%% | |
img_weighted = np.abs(np.fft.ifft2(img_fft_weighted, axes=(-2, -1))) | |
ascii_weighted = np.abs(np.fft.ifft2(ascii_fft_weighted, axes=(-2, -1))) | |
img_min = np.min(np.mean(img_weighted, axis=(1, 2))) | |
ascii_min = np.min(np.mean(ascii_weighted, axis=(1, 2))) | |
img_max = np.max(np.mean(img_weighted, axis=(1, 2))) | |
ascii_max = np.max(np.mean(ascii_weighted, axis=(1, 2))) | |
print(img_min, img_max) | |
print(ascii_min, ascii_max) | |
rescale = (img_max - img_min) / (ascii_max - ascii_min) | |
print(rescale) | |
#%% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment