leslie-fang-intel’s gists

leslie-fang-intel / code2

Created October 30, 2024 12:07

	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile

leslie-fang-intel / index_put_code.py

Created October 30, 2024 11:21

	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile

leslie-fang-intel / generated_code.py

Created October 28, 2024 02:14

	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile

leslie-fang-intel / all shapes

Created September 26, 2024 05:01

	## All shapes
	* input tokens 1024; output tokens 128; BS 1
	```
	AUTOTUNE _weight_int8pack_mm(4096x4096, 4096x4096, 4096)
	cpp_packed_gemm_0 10.8958 ms 100.0%
	_weight_int8pack_mm 50.9464 ms 21.4%
	SingleProcess AUTOTUNE benchmarking takes 1.0826 seconds and 1.8839 seconds precompiling
	AUTOTUNE _weight_int8pack_mm(4096x4096, 11008x4096, 11008)
	cpp_packed_gemm_4 24.0196 ms 100.0%
	_weight_int8pack_mm 119.4106 ms 20.1%

leslie-fang-intel / Compare with IPEX.py

Created September 26, 2024 00:32

	import torch
	import torch._inductor.config as config
	from torchao.quantization import quant_api
	from torchao.utils import unwrap_tensor_subclass
	import copy
	import time
	import intel_extension_for_pytorch as ipex
	from intel_extension_for_pytorch.quantization import (
	prepare,
	convert,

leslie-fang-intel / upsample and padding.py

Created August 29, 2024 01:08

	# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test_softmax.py

	import torch
	import time
	import random
	import numpy as np
	import torch.nn as nn
	import torch.nn.functional as F
	import torch._inductor.config

leslie-fang-intel / transpose_mxn.py

Created August 29, 2024 01:07

	# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test_softmax.py

	import torch
	import time
	import random
	import numpy as np
	import torch.nn as nn
	import torch.nn.functional as F
	import torch._inductor.config

leslie-fang-intel / failure.msg

Created August 23, 2024 02:15

	[2024-07-12T21:28:58.846-07:00] Stderr:
	clang: /data/sandcastle/boxes/trunk-grepo-llvm-c2-grepo/external/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:2096: virtual bool llvm::AArch64TargetLowering::targetShrinkDemandedConstant(SDValue, const APInt &, const APInt &, TargetLoweringOpt &) const: Assertion `(Size == 32 \|\| Size == 64) && "i32 or i64 is expected after legalization."' failed.
	PLEASE submit a bug report to [https://github.com/llvm/llvm-project/issues/](https://l.facebook.com/l.php?u=https%3A%2F%2Fgithub.com%2Fllvm%2Fllvm-project%2Fissues%2F&h=AT3oJcQrlMaBU5EeuaXNqdc0UgL-iuEHAfPKdP0-iXOwQ0URdhuUTOJILwejDrCVbpeBakftq2EeWOJJuYs1UEOFd1-g1yBjPkwEf9l64H8ZcescGsa6ydx3LcgjMOtb2zLBbXnsePSd5f0lEreSyxaL) and include the crash backtrace, preprocessed source, and associated run script.
	Stack dump:
	0. Program arguments: ..././resources/usr/bin/clang -o .../__objects__/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp.pic.o -fPIC .../.cpp.argsfile -c xplat/caffe2/aten/src/ATen/native/cpu/BinaryOpsKernel.

leslie-fang-intel / sebotnet33ts_256_after_index_expr.py

Created August 22, 2024 01:14


	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks

leslie-fang-intel / script to check memory.py

Created August 20, 2024 05:27

	import requests
	import torch
	print(torch.__version__)
	import torch.nn as nn
	import os, pickle
	import numpy as np
	import torch._inductor.config as config

	config.freezing = True
	config.max_autotune = True

Leslie Fang leslie-fang-intel