takuma104 · January 11, 2023 15:03
diff --git a/mem_eff_attention_deterministic_algorithms_warn.py b/mem_eff_attention_deterministic_algorithms_warn.py
 import xformers
 import xformers.ops
 import torch

 device = 'cuda'
 dtype = torch.float16
 shape = (1, 1024, 16, 16)

 torch.manual_seed(0)

 q = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)
 k = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)
 v = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)

 torch.use_deterministic_algorithms(True, warn_only=True)

 op = xformers.ops.MemoryEfficientAttentionCutlassOp
 r = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=op)
 r.backward(torch.ones_like(q))

 '''
 The result should look something like this:

 UserWarning: efficient_attention_forward_cutlass does not have a deterministic 
 implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. 
 You can file an issue at https://github.com/pytorch/pytorch/issues to help us 
 prioritize adding deterministic support for this operation. 
 (Triggered internally at /.../ATen/Context.cpp:82.)

 UserWarning: mem_efficient_attention_backward_cutlass does not have a deterministic
 implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. 
 You can file an issue at https://github.com/pytorch/pytorch/issues to help us 
 prioritize adding deterministic support for this operation. 
 (Triggered internally at /.../ATen/Context.cpp:82.)
 '''
	import xformers
	import xformers.ops
	import torch

	device = 'cuda'
	dtype = torch.float16
	shape = (1, 1024, 16, 16)

	torch.manual_seed(0)

	q = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)
	k = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)
	v = torch.rand(shape, device=device, dtype=dtype, requires_grad=True)

	torch.use_deterministic_algorithms(True, warn_only=True)

	op = xformers.ops.MemoryEfficientAttentionCutlassOp
	r = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=op)
	r.backward(torch.ones_like(q))

	'''
	The result should look something like this:

	UserWarning: efficient_attention_forward_cutlass does not have a deterministic
	implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'.
	You can file an issue at https://github.com/pytorch/pytorch/issues to help us
	prioritize adding deterministic support for this operation.
	(Triggered internally at /.../ATen/Context.cpp:82.)

	UserWarning: mem_efficient_attention_backward_cutlass does not have a deterministic
	implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'.
	You can file an issue at https://github.com/pytorch/pytorch/issues to help us
	prioritize adding deterministic support for this operation.
	(Triggered internally at /.../ATen/Context.cpp:82.)
	'''