Created
October 8, 2024 13:20
-
-
Save ashiklom/04c30d9e07099ec396a0ca84ec9f85ac to your computer and use it in GitHub Desktop.
Usage of numcodecs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import numpy as np | |
import numcodecs | |
shuf = numcodecs.Shuffle() | |
zlib = numcodecs.Zlib(level=9) | |
# generate some random data | |
dat = np.random.randn(10_000) | |
dat_bytes = dat.tobytes() | |
# The length of a `bytes` object is a direct proxy for the object size. So, we | |
# don't necessarily have to write the object to disk. | |
len(dat_bytes) # 80,000 bytes (80 kB): 8 bytes per float x 10,000 floats | |
len(zlib.encode(dat_bytes)) # ~77 kB | |
len(zlib.encode(shuf.encode(dat_bytes))) # ~75 kB | |
dat_rounded = dat * 10 // 1 / 10 | |
dat_rounded_bytes = dat_rounded.tobytes() | |
len(zlib.encode(dat_rounded_bytes)) # 12 kB; much smaller! | |
len(zlib.encode(shuf.encode(dat_rounded_bytes))) # 24 kB; note, *worse* than just zlib! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment