leslie-fang-intel · April 5, 2025 00:09
diff --git a/test_case_to_verify_init.py b/test_case_to_verify_init.py
 import time
 import torch
 from torch import nn

 class M(torch.nn.Module):
    def __init__(self,):
        super().__init__()
        self.linear = torch.nn.Linear(1024, 512)
        self.relu = torch.nn.ReLU()

    def forward(self, attn_weights):
        attn_weights = self.linear(attn_weights)
        return self.relu(attn_weights)

 def main(layer, input):
    print(torch.cuda.is_available(), flush=True)
    layer = torch.compile(layer)
    for i in range(100):
        layer(input)

 if __name__ == "__main__":
    input = torch.rand((2, 16), device="cpu")
    layer = nn.Linear(16, 16, device="cpu")
    # 1. first run with compile on CPU
    main(layer, input)

    # 2. second run with compile on CPU
    input2 = torch.randn(2, 1024)
    main(M(), input2)

    # 3. thrid run with compile on cuda
    main(M().to("cuda"), input2.to("cuda"))
	import time
	import torch
	from torch import nn

	class M(torch.nn.Module):
	def __init__(self,):
	super().__init__()
	self.linear = torch.nn.Linear(1024, 512)
	self.relu = torch.nn.ReLU()

	def forward(self, attn_weights):
	attn_weights = self.linear(attn_weights)
	return self.relu(attn_weights)

	def main(layer, input):
	print(torch.cuda.is_available(), flush=True)
	layer = torch.compile(layer)
	for i in range(100):
	layer(input)

	if __name__ == "__main__":
	input = torch.rand((2, 16), device="cpu")
	layer = nn.Linear(16, 16, device="cpu")
	# 1. first run with compile on CPU
	main(layer, input)

	# 2. second run with compile on CPU
	input2 = torch.randn(2, 1024)
	main(M(), input2)

	# 3. thrid run with compile on cuda
	main(M().to("cuda"), input2.to("cuda"))