Created
October 24, 2025 20:49
-
-
Save paul-english/49f0c80f8a5271ee868c7ee28fac6531 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import io | |
| import gzip | |
| import lzma | |
| import torch | |
| from safetensors.torch import save | |
| x = torch.rand(100, 100) | |
| # float 16 | |
| y = x.to(torch.float16) | |
| buffer = io.BytesIO() | |
| torch.save(y, buffer) | |
| uncompressed_size = buffer.tell() | |
| torch.save(y, 'tensor_float16.pt') | |
| safetensor_data = save({'tensor': y}) | |
| compressed_data_st_gzip = gzip.compress(safetensor_data) | |
| compressed_size_st_gzip = len(compressed_data_st_gzip) | |
| with open('tensor_float16.safetensors.gz', 'wb') as f: | |
| f.write(compressed_data_st_gzip) | |
| compressed_data_st_lzma = lzma.compress(safetensor_data) | |
| compressed_size_st_lzma = len(compressed_data_st_lzma) | |
| with open('tensor_float16.safetensors.xz', 'wb') as f: | |
| f.write(compressed_data_st_lzma) | |
| # Compress with gzip | |
| buffer.seek(0) | |
| compressed_data_gzip = gzip.compress(buffer.read()) | |
| compressed_size_gzip = len(compressed_data_gzip) | |
| with open('tensor_float16.pt.gz', 'wb') as f: | |
| f.write(compressed_data_gzip) | |
| # Compress with lzma | |
| buffer.seek(0) | |
| compressed_data_lzma = lzma.compress(buffer.read()) | |
| compressed_size_lzma = len(compressed_data_lzma) | |
| with open('tensor_float16.pt.xz', 'wb') as f: | |
| f.write(compressed_data_lzma) | |
| print(f"Float 16 uncompressed: {uncompressed_size} bytes") | |
| print(f"Float 16 gzip compressed: {compressed_size_gzip} bytes") | |
| print(f"Float 16 gzip compression ratio: {compressed_size_gzip/uncompressed_size:.2%}") | |
| print(f"Float 16 lzma compressed: {compressed_size_lzma} bytes") | |
| print(f"Float 16 lzma compression ratio: {compressed_size_lzma/uncompressed_size:.2%}") | |
| print(f"Float 16 safetensors gzip compressed: {compressed_size_st_gzip} bytes") | |
| print(f"Float 16 safetensors gzip compression ratio: {compressed_size_st_gzip/uncompressed_size:.2%}") | |
| print(f"Float 16 safetensors lzma compressed: {compressed_size_st_lzma} bytes") | |
| print(f"Float 16 safetensors lzma compression ratio: {compressed_size_st_lzma/uncompressed_size:.2%}") | |
| print() | |
| # float 32 | |
| y = x.to(torch.float32) | |
| buffer = io.BytesIO() | |
| torch.save(y, buffer) | |
| uncompressed_size = buffer.tell() | |
| torch.save(y, 'tensor_float32.pt') | |
| safetensor_data = save({'tensor': y}) | |
| compressed_data_st_gzip = gzip.compress(safetensor_data) | |
| compressed_size_st_gzip = len(compressed_data_st_gzip) | |
| with open('tensor_float32.safetensors.gz', 'wb') as f: | |
| f.write(compressed_data_st_gzip) | |
| compressed_data_st_lzma = lzma.compress(safetensor_data) | |
| compressed_size_st_lzma = len(compressed_data_st_lzma) | |
| with open('tensor_float32.safetensors.xz', 'wb') as f: | |
| f.write(compressed_data_st_lzma) | |
| # Compress with gzip | |
| buffer.seek(0) | |
| compressed_data_gzip = gzip.compress(buffer.read()) | |
| compressed_size_gzip = len(compressed_data_gzip) | |
| with open('tensor_float32.pt.gz', 'wb') as f: | |
| f.write(compressed_data_gzip) | |
| # Compress with lzma | |
| buffer.seek(0) | |
| compressed_data_lzma = lzma.compress(buffer.read()) | |
| compressed_size_lzma = len(compressed_data_lzma) | |
| with open('tensor_float32.pt.xz', 'wb') as f: | |
| f.write(compressed_data_lzma) | |
| print(f"Float 32 uncompressed: {uncompressed_size} bytes") | |
| print(f"Float 32 gzip compressed: {compressed_size_gzip} bytes") | |
| print(f"Float 32 gzip compression ratio: {compressed_size_gzip/uncompressed_size:.2%}") | |
| print(f"Float 32 lzma compressed: {compressed_size_lzma} bytes") | |
| print(f"Float 32 lzma compression ratio: {compressed_size_lzma/uncompressed_size:.2%}") | |
| print(f"Float 32 safetensors gzip compressed: {compressed_size_st_gzip} bytes") | |
| print(f"Float 32 safetensors gzip compression ratio: {compressed_size_st_gzip/uncompressed_size:.2%}") | |
| print(f"Float 32 safetensors lzma compressed: {compressed_size_st_lzma} bytes") | |
| print(f"Float 32 safetensors lzma compression ratio: {compressed_size_st_lzma/uncompressed_size:.2%}") | |
| print() | |
| # bfloat 16 | |
| y = x.to(torch.bfloat16) | |
| buffer = io.BytesIO() | |
| torch.save(y, buffer) | |
| uncompressed_size = buffer.tell() | |
| torch.save(y, 'tensor_bfloat16.pt') | |
| safetensor_data = save({'tensor': y}) | |
| compressed_data_st_gzip = gzip.compress(safetensor_data) | |
| compressed_size_st_gzip = len(compressed_data_st_gzip) | |
| with open('tensor_bfloat16.safetensors.gz', 'wb') as f: | |
| f.write(compressed_data_st_gzip) | |
| compressed_data_st_lzma = lzma.compress(safetensor_data) | |
| compressed_size_st_lzma = len(compressed_data_st_lzma) | |
| with open('tensor_bfloat16.safetensors.xz', 'wb') as f: | |
| f.write(compressed_data_st_lzma) | |
| # Compress with gzip | |
| buffer.seek(0) | |
| compressed_data_gzip = gzip.compress(buffer.read()) | |
| compressed_size_gzip = len(compressed_data_gzip) | |
| with open('tensor_bfloat16.pt.gz', 'wb') as f: | |
| f.write(compressed_data_gzip) | |
| # Compress with lzma | |
| buffer.seek(0) | |
| compressed_data_lzma = lzma.compress(buffer.read()) | |
| compressed_size_lzma = len(compressed_data_lzma) | |
| with open('tensor_bfloat16.pt.xz', 'wb') as f: | |
| f.write(compressed_data_lzma) | |
| print(f"Bfloat 16 uncompressed: {uncompressed_size} bytes") | |
| print(f"Bfloat 16 gzip compressed: {compressed_size_gzip} bytes") | |
| print(f"Bfloat 16 gzip compression ratio: {compressed_size_gzip/uncompressed_size:.2%}") | |
| print(f"Bfloat 16 lzma compressed: {compressed_size_lzma} bytes") | |
| print(f"Bfloat 16 lzma compression ratio: {compressed_size_lzma/uncompressed_size:.2%}") | |
| print(f"Bfloat 16 safetensors gzip compressed: {compressed_size_st_gzip} bytes") | |
| print(f"Bfloat 16 safetensors gzip compression ratio: {compressed_size_st_gzip/uncompressed_size:.2%}") | |
| print(f"Bfloat 16 safetensors lzma compressed: {compressed_size_st_lzma} bytes") | |
| print(f"Bfloat 16 safetensors lzma compression ratio: {compressed_size_st_lzma/uncompressed_size:.2%}") | |
| print() | |
| # Messagepack | |
| import msgpack | |
| data = msgpack.packb(y.tolist()) | |
| uncompressed_size = len(data) | |
| with open('tensor_bfloat16_messagepack.msgpack', 'wb') as f: | |
| f.write(data) | |
| # Compress msgpack with gzip | |
| compressed_data_gzip = gzip.compress(data) | |
| compressed_size_gzip = len(compressed_data_gzip) | |
| with open('tensor_bfloat16_messagepack.msgpack.gz', 'wb') as f: | |
| f.write(compressed_data_gzip) | |
| # Compress msgpack with lzma | |
| compressed_data_lzma = lzma.compress(data) | |
| compressed_size_lzma = len(compressed_data_lzma) | |
| with open('tensor_bfloat16_messagepack.msgpack.xz', 'wb') as f: | |
| f.write(compressed_data_lzma) | |
| print(f"Messagepack uncompressed: {uncompressed_size} bytes") | |
| print(f"Messagepack gzip compressed: {compressed_size_gzip} bytes") | |
| print(f"Messagepack gzip compression ratio: {compressed_size_gzip/uncompressed_size:.2%}") | |
| print(f"Messagepack lzma compressed: {compressed_size_lzma} bytes") | |
| print(f"Messagepack lzma compression ratio: {compressed_size_lzma/uncompressed_size:.2%}") | |
| # safetensors |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment