690.0K raw-compressed
2.0M raw
2.4M html-dictionary
2.4M json-dictionary
3.6M json-compressed
3.9M json
9.3M html-compressed
14.0M html
Last active
November 22, 2021 17:31
-
-
Save Summertime/2837e6e904244b9d8c5559969b4e9dbe to your computer and use it in GitHub Desktop.
benchmarking HTML vs JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
DATADIR=$(pwd) | |
TEMPDIR= | |
trap 'cd ~;rm -fr -- "$TEMPDIR"' EXIT | |
TEMPDIR=$(mktemp -d) | |
cd -- "$TEMPDIR" | |
mkdir {html,json}{-compressed,-dictionary,} | |
cat -- "$DATADIR"/ckan_freenode_irc_logs_2014-2018.log | | |
sed -En 's/\[..:..:..\] <(.*?)>/\1/gp' | | |
tee raw | | |
python "$DATADIR"/generator.py | |
zstd -q -z raw -o raw-compressed | |
for T in json html; do | |
zstd -q --maxdict $((2**15)) --train $(find "$T" -type f | shuf -n 3500) | |
zstd -q --output-dir-flat "$T"-dictionary -z -D dictionary "$T"/* | |
zstd -q --output-dir-flat "$T"-compressed -z "$T"/* | |
rm dictionary | |
done | |
du -sbh {json,html}{-compressed,-dictionary,} raw{-compressed,} | sort -h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, random,json | |
def gendata(): | |
for index, line in enumerate(sys.stdin): | |
line = line.strip() | |
if not line: | |
continue | |
name, message = line.split(maxsplit=1) | |
d = { | |
"user":{ | |
"name":name, | |
"id":random.randint(10000000,99999999) | |
}, | |
"message":message, | |
"id":random.randint(10000000,99999999) | |
} | |
with open(f'json/{index:0>4}.json','w') as f: | |
f.write(tojson(d)) | |
with open(f'html/{index:0>4}.html','w') as f: | |
f.write(tohtml(d)) | |
tojson = lambda j: json.dumps(j, separators=(',', ':')) | |
def tohtml(j): | |
return ( | |
f'<div class=message data-id={j["id"]}>' | |
f'<div class=avatar>' | |
f'<img href=://imageserver/avatars/j["user"]["id"].webp>' | |
f'<svg version="1.1" width="300" height="200" xmlns="http://www.w3.org/2000/svg"><rect width="100%" height="100%" fill="red" /><circle cx="150" cy="100" r="80" fill="green" /><text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text></svg>' | |
f'</div>' | |
f'<div class=name>' | |
f'{j["user"]["name"]}' | |
f'</div>' | |
f'<div class=message-content onclick=achildcrys()>' | |
f'{j["message"]}' | |
f'</div>' | |
f'</div>' | |
) | |
gendata() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment