Skip to content

Instantly share code, notes, and snippets.

View leslie-fang-intel's full-sized avatar

Leslie Fang leslie-fang-intel

  • INTC
  • Shanghai
View GitHub Profile
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long, c_int
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
from torch._inductor.utils import maybe_profile
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long, c_int
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
from torch._inductor.utils import maybe_profile
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long, c_int
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
from torch._inductor.utils import maybe_profile
## All shapes
* input tokens 1024; output tokens 128; BS 1
```
AUTOTUNE _weight_int8pack_mm(4096x4096, 4096x4096, 4096)
cpp_packed_gemm_0 10.8958 ms 100.0%
_weight_int8pack_mm 50.9464 ms 21.4%
SingleProcess AUTOTUNE benchmarking takes 1.0826 seconds and 1.8839 seconds precompiling
AUTOTUNE _weight_int8pack_mm(4096x4096, 11008x4096, 11008)
cpp_packed_gemm_4 24.0196 ms 100.0%
_weight_int8pack_mm 119.4106 ms 20.1%
import torch
import torch._inductor.config as config
from torchao.quantization import quant_api
from torchao.utils import unwrap_tensor_subclass
import copy
import time
import intel_extension_for_pytorch as ipex
from intel_extension_for_pytorch.quantization import (
prepare,
convert,
# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test_softmax.py
import torch
import time
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch._inductor.config
# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test_softmax.py
import torch
import time
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch._inductor.config
[2024-07-12T21:28:58.846-07:00] Stderr:
clang: /data/sandcastle/boxes/trunk-grepo-llvm-c2-grepo/external/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:2096: virtual bool llvm::AArch64TargetLowering::targetShrinkDemandedConstant(SDValue, const APInt &, const APInt &, TargetLoweringOpt &) const: Assertion `(Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."' failed.
PLEASE submit a bug report to [https://github.com/llvm/llvm-project/issues/](https://l.facebook.com/l.php?u=https%3A%2F%2Fgithub.com%2Fllvm%2Fllvm-project%2Fissues%2F&h=AT3oJcQrlMaBU5EeuaXNqdc0UgL-iuEHAfPKdP0-iXOwQ0URdhuUTOJILwejDrCVbpeBakftq2EeWOJJuYs1UEOFd1-g1yBjPkwEf9l64H8ZcescGsa6ydx3LcgjMOtb2zLBbXnsePSd5f0lEreSyxaL) and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0. Program arguments: ..././resources/usr/bin/clang -o .../__objects__/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp.pic.o -fPIC .../.cpp.argsfile -c xplat/caffe2/aten/src/ATen/native/cpu/BinaryOpsKernel.
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long, c_int
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
import requests
import torch
print(torch.__version__)
import torch.nn as nn
import os, pickle
import numpy as np
import torch._inductor.config as config
config.freezing = True
config.max_autotune = True