leslie-fang-intel · March 24, 2025 08:08
diff --git a/gistfile1.txt b/gistfile1.txt
 import requests
 import torch
 print(torch.__version__, flush=True)
 import torch.nn as nn
 import os, pickle
 import numpy as np 
 import torch._inductor.config as config
 import torch._dynamo.config as dynamo_config
 import gc
 import time
 import psutil
 import refcycle
 import torchao
 from torchao import autoquant
 from torchao.quantization import ALL_AUTOQUANT_CLASS_LIST

 config.freezing = True
 config.max_autotune = True
 output_channels = 1024

 dtype = torch.bfloat16


 class M(torch.nn.Module):
    def __init__(self, output_channels, dtype):
        super().__init__()
        self.lin = torch.nn.Linear(1024, output_channels, bias=False).to(dtype)

    def forward(self, attn_weights):
        attn_weights = self.lin(attn_weights)
        return attn_weights


 if __name__ == "__main__":
    with torch.no_grad():
        model = M(output_channels, dtype).eval()
        ## Optional: invoke torch.compile
        model = torch.compile(model)
        model = torchao.autoquant(model, manual=True)

        x = torch.randn(2, 1024).to(dtype)
        model(x)
        model.finalize_autoquant()
        # Do we need to invoke model = torch.compile(model)
        model(x)
	import requests
	import torch
	print(torch.__version__, flush=True)
	import torch.nn as nn
	import os, pickle
	import numpy as np
	import torch._inductor.config as config
	import torch._dynamo.config as dynamo_config
	import gc
	import time
	import psutil
	import refcycle
	import torchao
	from torchao import autoquant
	from torchao.quantization import ALL_AUTOQUANT_CLASS_LIST

	config.freezing = True
	config.max_autotune = True
	output_channels = 1024

	dtype = torch.bfloat16


	class M(torch.nn.Module):
	def __init__(self, output_channels, dtype):
	super().__init__()
	self.lin = torch.nn.Linear(1024, output_channels, bias=False).to(dtype)

	def forward(self, attn_weights):
	attn_weights = self.lin(attn_weights)
	return attn_weights


	if __name__ == "__main__":
	with torch.no_grad():
	model = M(output_channels, dtype).eval()
	## Optional: invoke torch.compile
	model = torch.compile(model)
	model = torchao.autoquant(model, manual=True)

	x = torch.randn(2, 1024).to(dtype)
	model(x)
	model.finalize_autoquant()
	# Do we need to invoke model = torch.compile(model)
	model(x)
No results found