Skip to content

Instantly share code, notes, and snippets.

@jasondyoungberg
Created April 13, 2022 19:34
Show Gist options
  • Save jasondyoungberg/0322a9a1078e37d6724d8bbfae4ff412 to your computer and use it in GitHub Desktop.
Save jasondyoungberg/0322a9a1078e37d6724d8bbfae4ff412 to your computer and use it in GitHub Desktop.
Scripts for r/place 2022 data
#6d001a #be0039 #ff4500 #ffa800
#ffd635 #fff8b8 #00a368 #00cc78
#7eed56 #00756f #009eaa #00ccc0
#2450a4 #3690ea #51e9f4 #493ac1
#6a5cff #94b3ff #811e9f #b44ac0
#e4abff #de107f #ff3881 #ff99aa
#6d482f #9c6926 #ffb470 #000000
#515252 #898d90 #d4d7d9 #ffffff
from dateutil import parser
#raw = open('data_short.csv')
raw = open('data_raw.csv')
raw.readline()
dataCanvas = open('data_canvas.csv','w')
dataUsers = open('data_users.csv','w')
canvasBuffer = "id,time,user_id,color,x,y\n"
userBuffer = "id,hash\n"
users = {}
palette = [x.upper() for x in [
"#6d001a","#be0039","#ff4500","#ffa800",
"#ffd635","#fff8b8","#00a368","#00cc78",
"#7eed56","#00756f","#009eaa","#00ccc0",
"#2450a4","#3690ea","#51e9f4","#493ac1",
"#6a5cff","#94b3ff","#811e9f","#b44ac0",
"#e4abff","#de107f","#ff3881","#ff99aa",
"#6d482f","#9c6926","#ffb470","#000000",
"#515252","#898d90","#d4d7d9","#ffffff"]]
def index(hash):
global users, userBuffer
if hash in users: return users[hash]
userBuffer += f'{len(users)},{hash}\n'
users[hash] = len(users)
return len(users) - 1
def update():
global userBuffer, canvasBuffer, i
i += 1
if i % 10000 == 0:
print(
f'{100*j/160353104:.2f}',
end='%\r')
dataCanvas.write(canvasBuffer)
dataUsers.write(userBuffer)
userBuffer = ''
canvasBuffer = ''
apr1 = int(parser.parse('2022-04-01 00:00:00.0 UTC').timestamp() * 1000)
i = 0
j = 0
for line in raw:
j += 1
if line == '': continue
data = line.split(',')
time = int(parser.parse(data[0]).timestamp() * 1000) - apr1
userID = index(data[1])
color = palette.index(data[2])
if len(data) == 7:
x1 = int(data[3].replace('"',''))
y1 = int(data[4])
x2 = int(data[5])
y2 = int(data[6].replace('"\n',''))
for x in range(x1, x2 + 1):
for y in range(y1, y2 + 1):
canvasBuffer += f'{i},{time},{userID},{color},{x},{y}\n'
update()
else:
x = data[3].replace('"','')
y = data[4].replace('"\n','')
canvasBuffer += f'{i},{time},{userID},{color},{x},{y}\n'
update()
dataCanvas.write(canvasBuffer)
dataUsers.write(userBuffer)
DROP TABLE IF EXISTS tiles;
CREATE TABLE tiles (
id INT PRIMARY KEY,
time INT NOT NULL, -- ms since april 1 2022
user_id INT NOT NULL,
color TINYINT NOT NULL, -- 0-31, instead of storing hex string in db
x SMALLINT NOT NULL,
y SMALLINT NOT NULL,
INDEX(time),
INDEX(user_id),
INDEX(color),
INDEX(x),
INDEX(y));
LOAD DATA INFILE 'tiles.csv'
INTO TABLE tiles
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
----------
DROP TABLE IF EXISTS users;
CREATE TABLE users (
id INT PRIMARY KEY,
hash BINARY(64) NOT NULL UNIQUE,
name VARCHAR(20) UNIQUE,
INDEX(hash),
INDEX(name));
LOAD DATA INFILE 'users.csv'
INTO TABLE users
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(id,@hash)
SET hash = FROM_BASE64(@hash);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment