Last active
March 17, 2025 01:06
-
-
Save AmosLewis/00fdb4e9a96f29c188828e3ff4ea29ef to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/chi/llama/atten/fp8_attn.mlir | |
iree-compile \ | |
/sharedfile/attn/128/fp8_attn.mlir \ | |
--iree-hip-target=gfx942 \ | |
-o=/sharedfile/attn/128/fp8_attn.vmfb \ | |
--iree-hal-target-device=hip \ | |
--iree-dispatch-creation-enable-aggressive-fusion=true \ | |
--iree-global-opt-propagate-transposes=true \ | |
--iree-opt-aggressively-propagate-transposes=true \ | |
--iree-opt-data-tiling=false \ | |
--iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-opt-strip-assertions | |
# All the input.bin can be found on ssh chi@SharkMi300X machine corrensponding patch | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=/sharedfile/attn/128/fp8_attn.vmfb \ | |
--parameters=model=/sharedfile/attn/fp8_attn.irpa \ | |
--device=hip://4 \ | |
--function=prefill_bs4 \ | |
--input=4x128xi64=@/sharedfile/128/prefill/prefill_token_ids_4x128xi64.bin \ | |
--input=4xi64=@/sharedfile/128/prefill/prefill_seq_lens_4xi64.bin \ | |
--input=4x4xi64=@/sharedfile/128/prefill/prefill_seq_block_ids_4x4xi64.bin \ | |
--input=261x2097152xf8E4M3FNUZ=@/sharedfile/128/prefill/prefill_cache_state_261x2097152xf8E4M3FNUZ.bin \ | |
--benchmark_repetitions=3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment