-
-
Save AmosLewis/dd31ab37517977b1c499d06495b4adc2 to your computer and use it in GitHub Desktop.
cmake -GNinja -Bbuild \ | |
-DCMAKE_BUILD_TYPE=Debug \ | |
-DCMAKE_C_COMPILER=clang \ | |
-DCMAKE_CXX_COMPILER=clang++ \ | |
-DPython3_FIND_VIRTUALENV=ONLY \ | |
-DLLVM_ENABLE_PROJECTS=mlir \ | |
-DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects" \ | |
-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR=`pwd` \ | |
-DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR=`pwd`/externals/llvm-external-projects/torch-mlir-dialects \ | |
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \ | |
-DLLVM_TARGETS_TO_BUILD=host \ | |
externals/llvm-project/llvm | |
cmake --build build --target tools/torch-mlir/all | |
cmake -GNinja -Bbuild \ | |
-DCMAKE_BUILD_TYPE=Debug \ | |
-DCMAKE_C_COMPILER=clang \ | |
-DCMAKE_CXX_COMPILER=clang++ \ | |
-DPython3_FIND_VIRTUALENV=ONLY \ | |
-DLLVM_ENABLE_PROJECTS=mlir \ | |
-DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects" \ | |
-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR=`pwd` \ | |
-DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR=`pwd`/externals/llvm-external-projects/torch-mlir-dialects \ | |
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \ | |
-DTORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS=ON \ | |
-DLLVM_TARGETS_TO_BUILD=host \ | |
externals/llvm-project/llvm | |
cmake --build build --target tools/torch-mlir/all | |
git submodule update --init --progressgit submodule update --init --progress | |
git add -u | |
git commit --amend --no-edit | |
git reset --hard HEAD~1 | |
git push origin as_stride --force | |
pip3 install clang-format | |
git clang-format HEAD~1 | |
torch-mlir-opt -convert-torch-to-tosa /tmp/index.mlir | externals/llvm-project/mlir/utils/generate-test-checks.py | |
--convert-torch-to-linalg | |
--torch-backend-to-linalg-on-tensors-backend-pipeline | |
torch-mlir-opt --convert-torch-onnx-to-torch --torch-decompose-complex-ops --cse --canonicalize --convert-torch-to-linalg reshape.default.onnx.mlir --debug | |
torch-mlir-opt --convert-torch-onnx-to-torch --torch-lower-to-backend-contract --torch-scalarize-shapes --torch-shape-refinement-pipeline --torch-backend-to-linalg-on-tensors-backend-pipeline onnx.mlir | |
torch-mlir-opt -convert-torch-to-tosa /tmp/index.mlir -mlir-print-ir-after-all -mlir-disable-threading --mlir-print-ir-before-all --debug | |
torch-mlir-opt --mlir-elide-elementsattrs-if-larger=400 --mlir-elide-resource-strings-if-larger=400 model.mlir > model.elide.mlir | |
grep -r "AveragePool" Inception_v4_vaiq_int8.default.torch-onnx.mlir | |
iree-compile --iree-vm-bytecode-module-output-format=flatbuffer-binary --dump-compilation-phases-to=./model-phases-rocm-Reshape_3/ /proj/gdba/shark/chi/src/SHARK-TestSuite/alt_e2eshark/test-run/mygpt4_trunc_Reshape_3/model.torch_onnx.mlir -o model_direct_Reshape_3.vmfb | |
iree-run-module --trace_execution=true --print_statistics=true --module=compiled_model.vmfb --function=tf2onnx --input="1x4xsi32=1" | |
torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=torch.aten.flatten.using_ints})' ./t5small_torchscript_0327_transformers4.26.0.mlir > t5small_torchbackend0420_torchscript_0327_transformers4.26.0.mlir
torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-tosa-backend-pipeline)' t5small_torchbackend0420_torchscript_0327_transformers4.26.0.mlir > t5small_tosa_torchbackend0420_torchscript_0327_transformers4.26.0.mlir
torch-mlir-opt --mlir-elide-elementsattrs-if-larger=4 t5small_tosa_torchbackend0420_torchscript_0327_transformers4.26.0.mlir > t5small_tosa_torchbackend0420_torchscript_0327_transformers4.26.0_elide.mlir
./build/bin/torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=aten.flatten.using_ints})' /tmp/deberta_stablehlo_0605_transformers4.26.0.mlir --mlir-print-ir-before=torch-decompose-complex-ops
torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=torch.aten.flatten.using_ints})' t5large_torchscript_0306_transformers4.26.0.mlir > t5large_torchbackend0420_torchscript_0306_transformers4.26.0.mlir
torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-tosa-backend-pipeline)' t5large_torchbackend0420_torchscript_0306_transformers4.26.0.mlir > t5large_tosa_torchbackend0420_torchscript_0306_transformers4.26.0.mlir
torch-mlir-opt --mlir-elide-elementsattrs-if-larger=4 t5large_tosa_torchbackend0420_torchscript_0306_transformers4.26.0.mlir > t5large_tosa_torchbackend0420_torchscript_0306_transformers4.26.0_elide.mlir
--convert-torch-to-stablehlo - Convert Torch ops to Stablehlo ops
--enable-i32-index - Enable truncate index from i64 to i32(unsafely)
--enable-static-shape - Enable static shape conversion
torch-mlir-opt --convert-torch-to-stablehlo
--torch-backend-to-stablehlo-backend-pipeline
torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-stablehlo-backend-pipeline)' /tmp/Net.mlir
https://github.com/llvm/torch-mlir/pull/1636/files
def aten〇sign〡shape(self: List[int]) -> List[int]:
return upstream_shape_functions.unary(self)
def aten〇sign〡dtype(self_rank_dtype: Tuple[int, int]) -> int:
_, self_dtype = self_rank_dtype
return self_dtype
./build_tools/update_abstract_interp_lib.sh
git clang-format HEAD~1
ssh -i /Users/chi/.ssh/google_compute_engine [email protected]
PYTHON=python3.11 VENV_DIR=shark.venv IMPORTER=1 ./setup_venv.sh
openvpn3 session-start --config /etc/openvpn/client.conf
openvpn3 sessions-list
openvpn3 session-manage --session-path /net/openvpn/v3/sessions/452dada3sec59s4b26s8ec8s19846aebc4c4 --disconnect
https://www.cherryservers.com/blog/how-to-install-and-start-using-docker-on-ubuntu-20-04
docker create \
--name cliu-ubuntu \
-e HOST_IP=$(ifconfig eth0 | awk '/ *inet /{print $2}') \
-t -i \
--mount type=bind,source=/disk1/cliu/,target=/disk1/cliu/ \
ubuntu:20.04 /bin/bash
docker container ls -a
docker container start cliu-ubuntu
docker exec -it cliu-ubuntu /bin/bash
docker container stop cliu-ubuntu
anaconda with python3.10
wget -P /tmp https://repo.anaconda.com/archive/Anaconda3-2023.03-1-Linux-x86_64.sh
bash /tmp/Anaconda3-2023.03-1-Linux-x86_64.sh
eval "$(/root/anaconda3/bin/conda shell.bash hook)"
IREE env and build step
python -m venv iree_venv
source iree_venv/bin/activate
source /nodclouddata/chi/src/SHARK-Turbine/turbine_venv/bin/activate
export PYTHONPATH=/nodclouddata/chi/src/iree-build/compiler/bindings/python:/nodclouddata/chi/src/iree-build/runtime/bindings/python
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt
cmake -G Ninja -B ../iree-build/ -S . \
-DCMAKE_BUILD_TYPE=Debug \
-DIREE_ENABLE_ASSERTIONS=ON \
-DIREE_ENABLE_SPLIT_DWARF=ON \
-DIREE_ENABLE_THIN_ARCHIVES=ON \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DIREE_BUILD_PYTHON_BINDINGS=ON \
-DPython3_EXECUTABLE="$(which python)" \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DIREE_ENABLE_LLD=ON
cmake --build ../iree-build/
source ../iree-build/.env && export PYTHONPATH
export IREE_SAVE_TEMPS="/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp"
iree-org/iree#14739
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/iree/compiler/tools/../_mlir_libs/iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-embedded-linker-path=/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-hal-dump-executable-sources-to=/nodclouddata/chi/src/SHARK/nan/dispatch/2
IREE debug steps:
20240809:
iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu dpn68_vaiq.default.onnx.linalg.mlir > dpn68_vaiq.default.vmfb --iree-hal-dump-executable-sources-to=./dispatch
- To generate all of the dispatches
--iree-hal-dump-executable-sources-to=/nodclouddata/chi/src/SHARK/nan/dispatch/3
iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-hal-dump-executable-sources-to=/nodclouddata/chi/src/SHARK/nan/dispatch/3 -o /tmp/chatglm.vmfb
- To generate vmfb which can run to dispatch 9
--iree-flow-break-dispatch=@forward:9
iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 -o /tmp/chatglm9.vmfb
Try to run step 5 to check if the dispatch 9 generate the NAN/bug value. If not, try all other dispatches generated from 1 step until find the first dispatch repeat the NAN/bug values. Then try the following 2&3 steps to look deep into the bug dispatches.
- To generate vmfb that can print input and output before each dispatch9
--iree-flow-trace-dispatch-tensors
iree-org/iree#15661 (comment)
iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 --iree-flow-trace-dispatch-tensors -o /tmp/chatglm9.vmfb > ./nan/dispatch/3/tmp/1218_chatglm_forward9-dispatch-tensors.txt
Then search the NAN/INF/other bug in output dispatch-tensors.txt:
grep -B 5 --max-count=1 -n INF ./1218_chatglm_forward9-dispatch-tensors.txt
- To add the annotation into the dispatch before the error,
-mlir-print-ir-after=iree-flow-annotate-dispatches -mlir-elide-elementsattrs-if-larger=4
iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 --iree-flow-trace-dispatch-tensors -mlir-print-ir-after=iree-flow-annotate-dispatches -mlir-elide-elementsattrs-if-larger=4 -o /tmp/chatglm9.vmfb
Then copy paste the mlir from the output into use 1218_chatglm_forward9-dispatch-tensors-annotation.mlir
. Then use iree-opt ./1218_chatglm_forward9-dispatch-tensors-annotation.mlir
to elide the loc info
- To run the vmfb after 2/3/4
iree-run-module \
--device=local-task \
--module="/tmp/chatglm9.vmfb" \
--function=forward \
--input="1x4xi64=1"
torch-mlir-opt -convert-torch-onnx-to-torch /tmp/node_test_add_model.mlir
Find the CPU info by running lscpu
.
Then use CPU family and Model number to figure out what microarchitecture is. https://en.wikichip.org/wiki/intel/cpuid You will get it if you search family 6 model 63
for haswell
on the web page.
Then we know we need this flag in iree-compile
--iree-llvmcpu-target-cpu=haswell
az network bastion ssh --name "bastion-server-east1" --resource-group "pdue-nod-ai-rg" --target-ip-address "10.0.0.8" --auth-type "ssh-key" --username "chi" --ssh-key "C:\Users\chiliu12\chi-cpu_key.pem"
ONNX e2eshark test add attributes.
import onnx
from onnx import numpy_helper
# Create an Add node
add_node = onnx.helper.make_node("Add", inputs=["A", "B"], outputs=["C"])
# Set an attribute value (e.g., alpha)
add_node.attribute.append(onnx.helper.make_attribute("alpha", 2.0))
# Serialize the ONNX graph
graph = onnx.helper.make_graph([add_node], "add_graph", inputs=[...], outputs=[...])
model = onnx.helper.make_model(graph)
onnx.save(model, "add_model.onnx")
Shark-TestSuites/e2eshark useful cmd:
Run one onnx model:
python ./run.py --torchmlirbuild ../../torch-mlir/build --tolerance 0.001 0.001 --cachedir ./huggingface_cache --runupto torch-mlir --torchtolinalg --ireebuild ../../iree-build --tests onnx/models/retinanet_resnet50_fpn_vaiq_int8
Run all onnx model:
python ./run.py --torchmlirbuild ../../torch-mlir/build --tolerance 0.001 0.001 --cachedir ./huggingface_cache --runupto iree-compile --torchtolinalg --ireebuild ../../iree-build --report
Run one op:
python run.py -c ../../torch-mlir/build/ -i ../../iree-build/ -f onnx --tests onnx/operators/ReduceProdKeepdims0 --cachedir cachedir --report --runupto torch-mlir --torchtolinalg
Run all the pytorch model
python ./run.py --torchmlirbuild ../../torch-mlir/build --tolerance 0.001 0.001 --cachedir ./huggingface_cache --ireebuild ../../ iree-build -runupto iree-compile -f pytorch -g models --mode onnx
Run one pytorch model
python ./run.py --torchmlirbuild ../../torch-mlir/build --tolerance 0.001 0.001 --cachedir ./huggingface_cache --ireebuild ../../ iree-build -runupto iree-compile -f pytorch -g models --mode onnx --tests onnx/models/retinanet_resnet50_fpn_vaiq_int8
upload big zip file from vm to az storave
az storage blob upload --account-name onnxstorage --container-name onnxstorage --name bugcases/torchtolinalgpipelineissue.zip --file torchtolinalgpipelineissue.zip --auth-mode key
pip install \
--find-links https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels \
--upgrade \
torch-mlir
pip install \
--find-links https://iree.dev/pip-release-links.html \
--upgrade \
iree-compiler \
iree-runtime
run iree_tests
pytest SHARK-TestSuite/iree_tests/onnx/ \
-rpfE \
--numprocesses 24 \
--timeout=30 \
--durations=20 \
--no-skip-tests-missing-files \
--config-files=/proj/gdba/shark/chi/src/iree/build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json \
--report-log=/proj/gdba/shark/chi/src/iree_log.txt
search all sub directory for a string.
grep -R "torch.aten.tensor" *
torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=torch.aten.flatten.using_ints})' ./distilgpt_torch_raw_transformers4.21.2.mlir > torchbackend.mlir
torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-tosa-backend-pipeline)' torchbackend.mlir > tosabackend.mlir