Last active
March 27, 2025 21:54
-
-
Save AmosLewis/f9f71aec560170fcf58a48ec96a6e893 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ssh chi@SharkMi300X | |
# iree-3.4.0rc20250327 | |
# build iree with tracy | |
git checkout iree-3.4.0rc20250327 | |
cmake -G Ninja -B ../iree-build-trace/ -S . \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DIREE_ENABLE_ASSERTIONS=ON \ | |
-DIREE_ENABLE_SPLIT_DWARF=ON \ | |
-DIREE_ENABLE_THIN_ARCHIVES=ON \ | |
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ | |
-DIREE_BUILD_PYTHON_BINDINGS=ON \ | |
-DPython3_EXECUTABLE="$(which python)" \ | |
-DCMAKE_C_COMPILER=clang \ | |
-DCMAKE_CXX_COMPILER=clang++ \ | |
-DIREE_ENABLE_RUNTIME_TRACING=ON \ | |
-DIREE_BUILD_TRACY=ON \ | |
-DIREE_TARGET_BACKEND_ROCM=ON \ | |
-DIREE_HAL_DRIVER_HIP=ON \ | |
-DIREE_ENABLE_LLD=ON | |
cmake --build ../iree-build-trace/ | |
################################################################################################### | |
# shark-ai 0327 export mlir | |
# shark-ai commit: | |
# 698bceab2de5705884d6fbde41ad4f908a7a00c2 | |
# Change ShardedTensor.clone to error out on wrong args (#1179) | |
################################################################################################### | |
# python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/sharedfile/attn/fp8_attn.irpa \ | |
# --output-mlir=/sharedfile/attn/128/fp8_attn.mlir \ | |
# --output-config=/sharedfile/attn/128/config_attn.json \ | |
# --bs-prefill=4 --bs-decode=4 --attention-kernel sharktank \ | |
# --attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16 --use-attention-mask --use-hf --kv-cache-dtype=float8_e4m3fnuz | |
# compile since iree-3.4.0rc20250327 | |
# the irpa file: | |
# wget https://sharkblobs.blob.core.windows.net/chi/llama_8b_fp8_attn | |
# the mlir file get 31ms | |
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/chi/llama/atten/fp8_attn_0327.mlir | |
# the mlir file get 25.6ms | |
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/chi/llama/atten/fp8_attn_i907_0320.mlir | |
/home/chi/src/iree-build-trace/tools/iree-compile \ | |
/sharedfile/attn/128/fp8_attn_0327.mlir \ | |
--iree-hip-target=gfx942 \ | |
-o=/sharedfile/attn/128/fp8_attn_tracy_iree0327_mlir0327.vmfb \ | |
--iree-hal-target-device=hip \ | |
--iree-opt-level=O3 \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-hal-executable-debug-level=3 \ | |
--iree-hal-dump-executable-sources-to=dump | |
# all inputs in sharkpublic/chi/llama/input/ | |
TRACY_NO_EXIT=1 \ | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
/home/chi/src/iree-build-trace/tools/iree-run-module \ | |
--hip_use_streams=true \ | |
--module=/sharedfile/attn/128/fp8_attn_tracy_iree0327_mlir0327.vmfb \ | |
--parameters=model=/sharedfile/attn/fp8_attn.irpa \ | |
--device=hip://4 \ | |
--function=prefill_bs4 \ | |
--input=4x128xi64=@/sharedfile/128/prefill/prefill_token_ids_4x128xi64.bin \ | |
--input=4xi64=@/sharedfile/128/prefill/prefill_seq_lens_4xi64.bin \ | |
--input=4x4xi64=@/sharedfile/128/prefill/prefill_seq_block_ids_4x4xi64.bin \ | |
--input=261x2097152xf8E4M3FNUZ=@/sharedfile/128/prefill/prefill_cache_state_261x2097152xf8E4M3FNUZ.bin | |
# EXEC @prefill_bs4 | |
# result[0]: hal.buffer_view | |
# 4x128x128256xf32=[[2.38743 1.13533 0.0316274 0.404722 -2.59714 -0.0234363 4.38464 4.32849 0.552629 0.267567 -2.72748 5.84252 2.77527 4.22566 2.19176 -2.30531 0.604199 0.899531 -1.10129 1.12569 0.869303 2.23187 -0.0645586 0.158095 -0.140919 -0.0600918 1.4782 0.74488 -0.94031 0.467049 1.19404 2.37265 -0.148914 1.62083 1.7 ... | |
# ...][...][...]] | |
# another terminal run and get | |
(.venv) ➜ tracy /home/chi/src/iree-build-trace/tracy/iree-tracy-capture -f -o 8b_fp8_prefill_bs4_128_iree0327_mlir0327.tracy | |
# Connecting to 127.0.0.1:8086... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment