Skip to content

Instantly share code, notes, and snippets.

@leslie-fang-intel
Created April 5, 2025 00:09
Show Gist options
  • Save leslie-fang-intel/edbdca70dfe7b662067857077f68cf0d to your computer and use it in GitHub Desktop.
Save leslie-fang-intel/edbdca70dfe7b662067857077f68cf0d to your computer and use it in GitHub Desktop.
import time
import torch
from torch import nn
class M(torch.nn.Module):
def __init__(self,):
super().__init__()
self.linear = torch.nn.Linear(1024, 512)
self.relu = torch.nn.ReLU()
def forward(self, attn_weights):
attn_weights = self.linear(attn_weights)
return self.relu(attn_weights)
def main(layer, input):
print(torch.cuda.is_available(), flush=True)
layer = torch.compile(layer)
for i in range(100):
layer(input)
if __name__ == "__main__":
input = torch.rand((2, 16), device="cpu")
layer = nn.Linear(16, 16, device="cpu")
# 1. first run with compile on CPU
main(layer, input)
# 2. second run with compile on CPU
input2 = torch.randn(2, 1024)
main(M(), input2)
# 3. thrid run with compile on cuda
main(M().to("cuda"), input2.to("cuda"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment