ebsmothers · June 4, 2024 01:34
diff --git a/gistfile1.txt b/gistfile1.txt
 from torch import nn
 from torchtune.utils import get_memory_stats, get_device
 from torchao.dtypes.nf4tensor import to_nf4
 from bitsandbytes.functional import quantize_nf4

 def main():
    
    device = get_device('cuda')

    # Size of Llama3-8B output projection weight
    big_linear = nn.Linear(in_features=4096, out_features=128256, bias=False, device=device)
    memory_stats = get_memory_stats(device=device)
    print(f"before quantize: {memory_stats}")
    
    # Quantize with ao
    ao_quant = to_nf4(big_linear.weight)
    memory_stats = get_memory_stats(device=device)
    print(f"after ao quant: {memory_stats}")



 if __name__ == "__main__":
    main()
	from torch import nn
	from torchtune.utils import get_memory_stats, get_device
	from torchao.dtypes.nf4tensor import to_nf4
	from bitsandbytes.functional import quantize_nf4

	def main():

	device = get_device('cuda')

	# Size of Llama3-8B output projection weight
	big_linear = nn.Linear(in_features=4096, out_features=128256, bias=False, device=device)
	memory_stats = get_memory_stats(device=device)
	print(f"before quantize: {memory_stats}")

	# Quantize with ao
	ao_quant = to_nf4(big_linear.weight)
	memory_stats = get_memory_stats(device=device)
	print(f"after ao quant: {memory_stats}")



	if __name__ == "__main__":
	main()