Created
January 26, 2023 19:45
-
-
Save lucataco/9342b4a458849bbf6679f7529abac495 to your computer and use it in GitHub Desktop.
Safetensors speed comparison with bloom-560M
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import datetime | |
from huggingface_hub import hf_hub_download | |
from safetensors.torch import load_file | |
import torch | |
sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors") | |
pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin") | |
start_st = datetime.datetime.now() | |
weights = load_file(sf_filename, device="cpu") | |
load_time_st = datetime.datetime.now() - start_st | |
print(f"Loaded safetensors {load_time_st}") | |
start_pt = datetime.datetime.now() | |
weights = torch.load(pt_filename, map_location="cpu") | |
load_time_pt = datetime.datetime.now() - start_pt | |
print(f"Loaded pytorch {load_time_pt}") | |
print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X") | |
# This is required because this feature hasn't been fully verified yet, but | |
# it's been tested on many different environments | |
os.environ["SAFETENSORS_FAST_GPU"] = "1" | |
# CUDA startup out of the measurement | |
torch.zeros((2, 2)).cuda() | |
start_st = datetime.datetime.now() | |
weights = load_file(sf_filename, device="cuda:0") | |
load_time_st = datetime.datetime.now() - start_st | |
print(f"Loaded safetensors {load_time_st}") | |
start_pt = datetime.datetime.now() | |
weights = torch.load(pt_filename, map_location="cuda:0") | |
load_time_pt = datetime.datetime.now() - start_pt | |
print(f"Loaded pytorch {load_time_pt}") | |
print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
HF Documentation: https://huggingface.co/docs/safetensors/speed#gpu-benchmark
CPU: Ryzen 7 5800X
GPU: RTX 4080
Loaded safetensors 0:00:00.020865
Loaded pytorch 0:00:00.235906
on CPU, safetensors is faster than pytorch by: 11.3 X
Loaded safetensors 0:00:00.110876
Loaded pytorch 0:00:00.256813
on GPU, safetensors is faster than pytorch by: 2.3 X