lucataco · January 26, 2023 19:45 · lucataco · Jan 26, 2023
diff --git a/safeBloom.py b/safeBloom.py
 import os
 import datetime
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 import torch

 sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors")
 pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin")


 start_st = datetime.datetime.now()
 weights = load_file(sf_filename, device="cpu")
 load_time_st = datetime.datetime.now() - start_st
 print(f"Loaded safetensors {load_time_st}")

 start_pt = datetime.datetime.now()
 weights = torch.load(pt_filename, map_location="cpu")
 load_time_pt = datetime.datetime.now() - start_pt
 print(f"Loaded pytorch {load_time_pt}")

 print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")


 # This is required because this feature hasn't been fully verified yet, but 
 # it's been tested on many different environments
 os.environ["SAFETENSORS_FAST_GPU"] = "1"

 # CUDA startup out of the measurement
 torch.zeros((2, 2)).cuda()

 start_st = datetime.datetime.now()
 weights = load_file(sf_filename, device="cuda:0")
 load_time_st = datetime.datetime.now() - start_st
 print(f"Loaded safetensors {load_time_st}")

 start_pt = datetime.datetime.now()
 weights = torch.load(pt_filename, map_location="cuda:0")
 load_time_pt = datetime.datetime.now() - start_pt
 print(f"Loaded pytorch {load_time_pt}")

 print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")
	import os
	import datetime
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file
	import torch

	sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors")
	pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin")


	start_st = datetime.datetime.now()
	weights = load_file(sf_filename, device="cpu")
	load_time_st = datetime.datetime.now() - start_st
	print(f"Loaded safetensors {load_time_st}")

	start_pt = datetime.datetime.now()
	weights = torch.load(pt_filename, map_location="cpu")
	load_time_pt = datetime.datetime.now() - start_pt
	print(f"Loaded pytorch {load_time_pt}")

	print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")


	# This is required because this feature hasn't been fully verified yet, but
	# it's been tested on many different environments
	os.environ["SAFETENSORS_FAST_GPU"] = "1"

	# CUDA startup out of the measurement
	torch.zeros((2, 2)).cuda()

	start_st = datetime.datetime.now()
	weights = load_file(sf_filename, device="cuda:0")
	load_time_st = datetime.datetime.now() - start_st
	print(f"Loaded safetensors {load_time_st}")

	start_pt = datetime.datetime.now()
	weights = torch.load(pt_filename, map_location="cuda:0")
	load_time_pt = datetime.datetime.now() - start_pt
	print(f"Loaded pytorch {load_time_pt}")

	print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")