Created
August 10, 2023 04:41
-
-
Save CharlZKP/5eb3d12d282dffa6b3af59e760d3ae1b to your computer and use it in GitHub Desktop.
faster numpy array compression and decompression speed with slightly smaller size, using zstandard
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code is licensed under the terms of the MIT license | |
import numpy as np | |
import zstandard as zstd | |
import json | |
def np_save_compressed(filename, array, compression_level=5): | |
""" | |
Save a NumPy array as a compressed file using Zstandard compression. | |
Args: | |
filename (str): The name of the output compressed file. | |
array (numpy.ndarray): The NumPy array to be compressed and saved. | |
compression_level (int, optional): The compression level (1 to 22). | |
Higher values provide better compression. Defaults to 5. | |
""" | |
metadata = { | |
"shape": array.shape, | |
"dtype": str(array.dtype) | |
} | |
cctx = zstd.ZstdCompressor(level=compression_level) | |
compressed_data = cctx.compress(array.tobytes()) | |
with open(filename, 'wb') as f: | |
metadata_bytes = json.dumps(metadata).encode('utf-8') | |
f.write(np.int32(len(metadata_bytes))) | |
f.write(metadata_bytes) | |
f.write(compressed_data) | |
def np_load_compressed(filename): | |
""" | |
Load a compressed NumPy array using Zstandard decompression. | |
Args: | |
filename (str): The name of the compressed file to load. | |
Returns: | |
numpy.ndarray: The decompressed NumPy array. | |
""" | |
cctx = zstd.ZstdDecompressor() | |
with open(filename, 'rb') as f: | |
metadata_size = np.frombuffer(f.read(np.int32().itemsize), dtype=np.int32)[0] | |
metadata = f.read(metadata_size).decode("utf-8") | |
compressed_data = f.read() | |
metadata_dict = json.loads(metadata) | |
dtype = np.dtype(metadata_dict["dtype"]) | |
shape = metadata_dict["shape"] | |
decompressed_data = cctx.decompress(compressed_data) | |
return np.frombuffer(decompressed_data, dtype=dtype).reshape(shape) | |
# np_save_compressed('array1.np.zstd', array1) | |
# np_save_compressed('array2.np.zstd', array2) | |
# loaded_array1 = np_load_compressed('array1.np.zstd') | |
# loaded_array2 = np_load_compressed('array2.np.zstd') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment