Created
June 27, 2025 04:44
-
-
Save AmosLewis/8f3e31db32a5bdb1ef64d7aa4f373dd5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # Check if a command-line argument is provided | |
if [ -z "$1" ]; then | |
iree_day="0624" | |
echo "No flag provided. Using default iree_day $iree_day." | |
else | |
iree_day="$1" | |
fi | |
if [ -z "$2" ]; then | |
shark_day="0626" | |
echo "No flag provided. Using default shark_day $shark_day." | |
else | |
shark_day="$2" | |
fi | |
# irpa_path=/home/chi/tmp/native_fp8_e4m3fnuz_llama3_405b.irpa | |
irpa_path=/shark-dev/405b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_405b.irpa | |
mlir_path_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.mlir" | |
config_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.json" | |
vmfb_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.vmfb" | |
vmfb_128_decode="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.vmfb" | |
benchmark_128_prefill="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.txt" | |
benchmark_128_decode="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.txt" | |
create_file_if_not_exists() { | |
local FILE="$1" | |
if [ ! -f "$FILE" ]; then | |
touch "$FILE" | |
echo "File created: $FILE" | |
else | |
echo "File already exists: $FILE" | |
fi | |
} | |
echo $mlir_path_128 | |
echo $config_128 | |
echo $vmfb_128 | |
echo $benchmark_128_prefill | |
create_file_if_not_exists $benchmark_128_prefill | |
echo $benchmark_128_decode | |
create_file_if_not_exists $benchmark_128_decode | |
# python3 \ | |
# -m \ | |
# sharktank.examples.export_paged_llm_v1 \ | |
# --irpa-file=/shark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
# --output-mlir=/home/chi/src/shark-ai/2025-04-13/llama-8b/fp8_attnf8_128_tp1.mlir \ | |
# --output-config=/home/chi/src/shark-ai/2025-04-13/llama-8b/fp8_attnf8_128_tp1.json \ | |
# --bs-prefill=4 \ | |
# --bs-decode=4 \ | |
# --block-seq-stride=32 \ | |
# --attention-dtype=float8_e4m3fnuz \ | |
# --activation-dtype=bfloat16 \ | |
# --kv-cache-dtype=float8_e4m3fnuz \ | |
# --attention-kernel=sharktank \ | |
# --use-attention-mask \ | |
# --use-hf \ | |
python3 -m sharktank.examples.export_paged_llm_v1 \ | |
--irpa-file=$irpa_path \ | |
--output-mlir=$mlir_path_128 \ | |
--output-config=$config_128 \ | |
--bs-prefill=4 \ | |
--bs-decode=4 \ | |
--attention-kernel=sharktank \ | |
--activation-dtype=bfloat16 \ | |
--use-attention-mask \ | |
--use-hf \ | |
--kv-cache-dtype=float8_e4m3fnuz | |
echo "iree-compile prefill: " | |
iree-compile \ | |
$mlir_path_128 \ | |
--iree-hip-target=gfx942 \ | |
-o=$vmfb_128 \ | |
--iree-hal-target-device=hip \ | |
--iree-opt-level=O3 \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-codegen-enable-default-tuning-specs=true | |
# --iree-dispatch-creation-propagate-collapse-across-expands=true \ | |
echo "iree-benchmark-module prefill: " | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=$vmfb_128 \ | |
--parameters=model=$irpa_path \ | |
--device=hip://4 \ | |
--function=prefill_bs4 \ | |
--input=4x128xi64 \ | |
--input=4xi64 \ | |
--input=4x4xi64 \ | |
--input=128x8257536xf8E4M3FNUZ \ | |
--benchmark_repetitions=3 \ | |
| tee $benchmark_128_prefill | |
echo "iree-compile decode: " | |
iree-compile \ | |
$mlir_path_128 \ | |
--iree-hip-target=gfx942 \ | |
-o=$vmfb_128_decode \ | |
--iree-hal-target-device=hip \ | |
--iree-opt-level=O3 \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-stream-affinity-solver-max-iterations=1024 | |
# --iree-dispatch-creation-propagate-collapse-across-expands=true \ | |
echo "iree-benchmark-module decode: " | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=$vmfb_128_decode \ | |
--parameters=model=$irpa_path \ | |
--device=hip://4 \ | |
--function=decode_bs4 \ | |
--input=4x1xi64 \ | |
--input=4xi64 \ | |
--input=4xi64 \ | |
--input=4x5xi64 \ | |
--input=128x8257532xf8E4M3FNUZ \ | |
--benchmark_repetitions=3 \ | |
| tee $benchmark_128_decode | |
###########################################2048############################################################################################### | |
mlir_path_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.mlir" | |
config_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.json" | |
vmfb_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.vmfb" | |
vmfb_2048_decode="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.vmfb" | |
benchmark_2048_prefill="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.txt" | |
benchmark_2048_decode="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.txt" | |
echo $mlir_path_2048 | |
echo $config_2048 | |
echo $vmfb_2048 | |
echo $benchmark_2048_prefill | |
create_file_if_not_exists $benchmark_2048_prefill | |
echo $benchmark_2048_decode | |
create_file_if_not_exists $benchmark_2048_decode | |
# echo $mlir_path_2048 | |
python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=$irpa_path \ | |
--output-mlir=$mlir_path_2048 \ | |
--output-config=$config_2048 \ | |
--bs-prefill=4 --bs-decode=4 --attention-kernel sharktank \ | |
--activation-dtype=bfloat16 --use-attention-mask --use-hf --kv-cache-dtype=float8_e4m3fnuz | |
echo "iree-compile prefill: " | |
iree-compile \ | |
$mlir_path_2048 \ | |
--iree-hip-target=gfx942 \ | |
-o=$vmfb_2048\ | |
--iree-hal-target-device=hip \ | |
--iree-opt-level=O3 \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-codegen-enable-default-tuning-specs=true | |
# --iree-dispatch-creation-propagate-collapse-across-expands=true \ | |
echo "iree-benchmark-module prefill: " | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=$vmfb_2048 \ | |
--parameters=model=$irpa_path \ | |
--device=hip://4 \ | |
--function=prefill_bs4 \ | |
--input=4x2048xi64 \ | |
--input=4xi64 \ | |
--input=4x64xi64 \ | |
--input=513x8257536xf8E4M3FNUZ \ | |
--benchmark_repetitions=3 \ | |
| tee $benchmark_2048_prefill | |
echo "iree-compile decode: " | |
iree-compile \ | |
$mlir_path_2048 \ | |
--iree-hip-target=gfx942 \ | |
-o=$vmfb_2048_decode\ | |
--iree-hal-target-device=hip \ | |
--iree-opt-level=O3 \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true | |
# --iree-dispatch-creation-propagate-collapse-across-expands=true | |
echo "iree-benchmark-module decode: " | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=$vmfb_2048_decode \ | |
--parameters=model=$irpa_path \ | |
--device=hip://4 \ | |
--function=decode_bs4 \ | |
--input=4x1xi64\ | |
--input=4xi64\ | |
--input=4xi64 \ | |
--input=4x65xi64 \ | |
--input=513x8257536xf8E4M3FNUZ \ | |
--benchmark_repetitions=3 \ | |
| tee $benchmark_2048_decode | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment