Leslie Fang leslie-fang-intel

from setuptools import setup, find_packages, Extension, Command import glob import os import pybind11 import shutil

import torch from torch.utils.cpp_extension import ( CUDA_HOME, IS_WINDOWS,

	# Run with: clear && mpiexec -n 2 python test_mpi4.py
	from mpi4py.futures import MPIPoolExecutor

	def work(x):
	print(f"Working on: {x}")
	return x * x

	if __name__ == "__main__":
	with MPIPoolExecutor() as executor:
	future = executor.submit(work, 5)

	import torch
	import sglang
	import sgl_kernel

	if __name__ == "__main__":
	a = torch.randn((1, 1024), dtype=torch.float32).to("xpu")

	ref_res = a + a
	res3 = torch.ops.sgl_kernel.sgl_test_sycl(a, a)

	import torch
	import torch._inductor.config as config
	# config.realize_opcount_threshold = 1

	class SimpleModel(torch.nn.Module):
	def forward(self, x0, x1, x2):
	tmp = x0 + x1
	tmp2 = tmp * x2
	return tmp2

	# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test.py

	import torch
	import time
	import random
	import numpy as np

	local_seed= 2024

	torch.manual_seed(local_seed) # Set PyTorch seed

	import requests
	import torch
	print(torch.__version__, flush=True)
	import torch.nn as nn
	import os, pickle
	import numpy as np
	import torch._inductor.config as config
	import torch._dynamo.config as dynamo_config
	import gc
	import time

	import torch
	from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
	from torchao.quantization import int4_weight_only
	from torchao.dtypes import Int4CPULayout
	import torch._inductor.config as config

	config.freezing = True
	# config.max_autotune = True

	with torch.no_grad():