Skip to content

Instantly share code, notes, and snippets.

module {
func.func @test_reduce_prod_default_axes_keepdims_random(%arg0: !torch.vtensor<[3,2,2],f32>) -> !torch.vtensor<[1,1,1],f32> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 18 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
%int0 = torch.constant.int 0
%int0_0 = torch.constant.int 0
%int1 = torch.constant.int 1
%int2 = torch.constant.int 2
%0 = torch.aten.dim %arg0 : !torch.vtensor<[3,2,2],f32> -> !torch.int
%1 = torch.aten.lt.int %int0_0, %int0 : !torch.int, !torch.int -> !torch.bool
%2 = torch.aten.Int.bool %1 : !torch.bool -> !torch.int
%3 = torch.aten.mul.int %2, %0 : !torch.int, !torch.int -> !torch.int
# Ubuntu 22 LTS
sudo apt install htop
sudo apt install curl
sudo apt install wget
sudo apt install git
sudo apt install clang-format
sudo apt install zsh
sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
# PYTHON
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "haswell", cpu_features = "-prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,-xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,-avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,-avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,-clwb,+mmx,+sse2,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,+fxsr,-avx512dq,-sse4a", d
(shark.venv) ➜ SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 -o /tmp/chatglm9.vmfb
<eval_with_key>.5:38:41: warning: skipping consteval initializer: unsupported type for current jit configuration: 'tensor<4608x64x64xi4>'
<eval_with_key>.5:173:43: warning: skipping consteval initializer:
(shark.venv) ➜ SHARK git:(main) ✗ python nan/qwen_compile.py
shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
tokenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 174/174 [00:00<00:00, 641kB/s]
tokenization_qwen.py: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.62k/9.62k [00:00<00:00, 36.3MB/s]
A new version of the following files was downloaded from https://huggingface.co/Qwen/Qwen-7B-Chat:
- tokenization_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
qwen.tiktoken: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.56M/2.56M [00:00<00:00, 13.8MB/s]
[DEBUG] generating mlir o
(shark.venv) ➜ SHARK git:(main) ✗ python nan/chatglm.py
shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
[DEBUG] generating mlir on device
/nodclouddata/chi/src/SHARK/nan/chatglm.py:103: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
input_ids = torch.tensor(input_ids)
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.ut
(shark.venv) ➜ SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 --iree-flow-trace-dispatch-tensors -mlir-print-ir-after=iree-flow-annotate-dispatches -mlir-elide-elementsattrs-if-larger=4 -o /tmp/chatglm9.vmfb
// -----// IR Dump After AnnotateDispatches (iree-flow-annotate-dispatches) //--
hal.executable public @forward_dispatch_9 {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "haswell", cpu_features = "-prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,-xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,-avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,-avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,-clwb,+mmx,+sse2,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,
(shark.venv) ➜ SHARK git:(main) ✗ iree-run-module --help
# ============================================================================
# 👻 IREE: iree-run-module
# ============================================================================
Runs a function within a compiled IREE module and handles I/O parsing
and optional expected value verification/output processing. Modules
can be provided by file path (`--module=file.vmfb`) or read from stdin
(`--module=-`) and the function to execute matches the original name
provided to the compiler (`--function=foo` for `func.func @foo`).
@AmosLewis
AmosLewis / llama_torch2linalg.mlir
Created December 5, 2023 05:16
(turbine_venv) ➜ SHARK-Turbine git:(bump-iree) ✗ torch-mlir-opt tests/dynamo/llama_test.mlir -convert-torch-to-linalg
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map3 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>
#map4 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
#map5 = affine_map<(d0, d1, d2) -> (d2)>
#map6 = affine_map<(d0, d1) -> (d0, d1)>
#map7 = affine_map<(d0, d1) -> (d1, d0)>
#map8 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>
#map9 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>