Last active
January 11, 2024 08:45
-
-
Save DavidBuchanan314/acae2aab38953759aacc114b417ed0b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
""" | |
This (pure!) python script streams a gzip-compressed YUV4MPEG video to stdout. | |
It easily runs at 1080p60fps on my machine. | |
Pipe it into a media player like this: | |
python3 gzip_swar_life.py | mbuffer | gunzip - | mpv - | |
(Note: mbuffer is used so that gzip and python don't block on each other) | |
The implementation works by storing each cell as a 4-bit value, all packed into | |
a single python bigint. Operations are performed on every cell in parallel, using | |
SWAR techniques ("SIMD Within A Register", where "register" is a bigint. SWAB?). | |
https://en.wikipedia.org/wiki/SWAR | |
4-bits-per-cell allows us to count the number of neighbors without overflow. | |
However, YUV4MPEG only supports 8-bit pixel formats. | |
We avoid performing pixel format conversion in python, by offloading the | |
problem to gzip. We generate a gzip file with a custom DEFLATE huffman tree, | |
which uses 4-bit symbols. Gzip will expand these 4-bit symbols into 8-bit bytes | |
for us, automagically. | |
# native res config for 14" M1 MBP (minus notch) | |
WIDTH = 3024 | |
HEIGHT = 1890 | |
FPS = 24 | |
""" | |
WIDTH = 1920 | |
HEIGHT = 1080 | |
FPS = 60 # Reduce if your media player is buffering. | |
GZIP_HEADER = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03" | |
YUV_HEADER = f"YUV4MPEG2 W{WIDTH} H{HEIGHT} F{FPS}:1 Cmono\n".encode() | |
# This tree could be encoded more concisely, but I'm lazy | |
MAGIC_HUFFMAN_TREE = \ | |
bytes.fromhex(f"6ce30990244992244908fc{'f'*60}00000020{'2'*14}0000") | |
STATE_BYTE_LENGTH = (WIDTH * HEIGHT) // 2 | |
COLSHIFT = WIDTH * 4 | |
WRAPSHIFT = WIDTH * HEIGHT * 4 | |
BIAS = (WIDTH + 1) * 4 | |
MASK_1 = int.from_bytes(b"\x11" * STATE_BYTE_LENGTH, "little") << BIAS | |
MASK_NOT_3 = MASK_1 * (15 ^ 3) | |
MASK_NOT_4 = MASK_1 * (15 ^ 4) | |
WRAP_MASK = int.from_bytes(b"\x11" * (BIAS//2), "little") << BIAS | |
BLIT_MASK_1 = int.from_bytes(b"\x01" * WIDTH * HEIGHT, "little") | |
def deflate_verbatim(data): | |
return len(data).to_bytes(2, "little") + ((len(data) ^ 0xffff).to_bytes(2, "little")) + data | |
sys.stdout.buffer.write(GZIP_HEADER) | |
sys.stdout.buffer.write(b"\x00" + deflate_verbatim(YUV_HEADER) + b"\x00") | |
state = int.from_bytes(os.urandom(STATE_BYTE_LENGTH), "little") & MASK_1 | |
while True: | |
""" | |
if we include ourself as a neighbor: | |
alive = (exactly 3 neighbors) or (alive and 4 neighbors) | |
""" | |
# implement wraparound | |
state |= (state >> WRAPSHIFT) + ((state & WRAP_MASK) << WRAPSHIFT) | |
# count neighbors | |
summed = state | |
summed += (state >> 4) + (state << 4) | |
summed += (summed >> COLSHIFT) + (summed << COLSHIFT) | |
# check if there are exactly 3 neighbors | |
has_3_neighbors = summed ^ MASK_NOT_3 # at this point, a value of all 1s means it was initially 3 | |
has_3_neighbors &= has_3_neighbors >> 2 # fold in half | |
has_3_neighbors &= has_3_neighbors >> 1 # fold in half again | |
# check if there are exactly 4 neighbors | |
has_4_neighbors = summed ^ MASK_NOT_4 # at this point, a value of all 1s means it was initially 4 | |
has_4_neighbors &= has_4_neighbors >> 2 # fold in half | |
has_4_neighbors &= has_4_neighbors >> 1 # fold in half again | |
# apply game-of-life rules | |
state &= has_4_neighbors | |
state |= has_3_neighbors | |
state &= MASK_1 | |
# output a (gzip-compressed!) YUV4MPEG frame | |
sys.stdout.buffer.write(deflate_verbatim(b"FRAME\n")) | |
sys.stdout.buffer.write(MAGIC_HUFFMAN_TREE) | |
sys.stdout.buffer.write((state>>(BIAS-3)).to_bytes((WIDTH*HEIGHT)//2, "little")) | |
sys.stdout.buffer.write(b"\x04") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just for fun!