Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created June 27, 2025 04:44
Show Gist options
  • Save AmosLewis/8f3e31db32a5bdb1ef64d7aa4f373dd5 to your computer and use it in GitHub Desktop.
Save AmosLewis/8f3e31db32a5bdb1ef64d7aa4f373dd5 to your computer and use it in GitHub Desktop.
# # Check if a command-line argument is provided
if [ -z "$1" ]; then
iree_day="0624"
echo "No flag provided. Using default iree_day $iree_day."
else
iree_day="$1"
fi
if [ -z "$2" ]; then
shark_day="0626"
echo "No flag provided. Using default shark_day $shark_day."
else
shark_day="$2"
fi
# irpa_path=/home/chi/tmp/native_fp8_e4m3fnuz_llama3_405b.irpa
irpa_path=/shark-dev/405b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_405b.irpa
mlir_path_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.mlir"
config_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.json"
vmfb_128="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.vmfb"
vmfb_128_decode="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.vmfb"
benchmark_128_prefill="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.txt"
benchmark_128_decode="/sharedfile/attn/128/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.txt"
create_file_if_not_exists() {
local FILE="$1"
if [ ! -f "$FILE" ]; then
touch "$FILE"
echo "File created: $FILE"
else
echo "File already exists: $FILE"
fi
}
echo $mlir_path_128
echo $config_128
echo $vmfb_128
echo $benchmark_128_prefill
create_file_if_not_exists $benchmark_128_prefill
echo $benchmark_128_decode
create_file_if_not_exists $benchmark_128_decode
# python3 \
# -m \
# sharktank.examples.export_paged_llm_v1 \
# --irpa-file=/shark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
# --output-mlir=/home/chi/src/shark-ai/2025-04-13/llama-8b/fp8_attnf8_128_tp1.mlir \
# --output-config=/home/chi/src/shark-ai/2025-04-13/llama-8b/fp8_attnf8_128_tp1.json \
# --bs-prefill=4 \
# --bs-decode=4 \
# --block-seq-stride=32 \
# --attention-dtype=float8_e4m3fnuz \
# --activation-dtype=bfloat16 \
# --kv-cache-dtype=float8_e4m3fnuz \
# --attention-kernel=sharktank \
# --use-attention-mask \
# --use-hf \
python3 -m sharktank.examples.export_paged_llm_v1 \
--irpa-file=$irpa_path \
--output-mlir=$mlir_path_128 \
--output-config=$config_128 \
--bs-prefill=4 \
--bs-decode=4 \
--attention-kernel=sharktank \
--activation-dtype=bfloat16 \
--use-attention-mask \
--use-hf \
--kv-cache-dtype=float8_e4m3fnuz
echo "iree-compile prefill: "
iree-compile \
$mlir_path_128 \
--iree-hip-target=gfx942 \
-o=$vmfb_128 \
--iree-hal-target-device=hip \
--iree-opt-level=O3 \
--iree-hal-indirect-command-buffers=true \
--iree-stream-resource-memory-model=discrete \
--iree-hal-memoization=true \
--iree-codegen-enable-default-tuning-specs=true
# --iree-dispatch-creation-propagate-collapse-across-expands=true \
echo "iree-benchmark-module prefill: "
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
iree-benchmark-module \
--hip_use_streams=true \
--module=$vmfb_128 \
--parameters=model=$irpa_path \
--device=hip://4 \
--function=prefill_bs4 \
--input=4x128xi64 \
--input=4xi64 \
--input=4x4xi64 \
--input=128x8257536xf8E4M3FNUZ \
--benchmark_repetitions=3 \
| tee $benchmark_128_prefill
echo "iree-compile decode: "
iree-compile \
$mlir_path_128 \
--iree-hip-target=gfx942 \
-o=$vmfb_128_decode \
--iree-hal-target-device=hip \
--iree-opt-level=O3 \
--iree-hal-indirect-command-buffers=true \
--iree-stream-resource-memory-model=discrete \
--iree-hal-memoization=true \
--iree-stream-affinity-solver-max-iterations=1024
# --iree-dispatch-creation-propagate-collapse-across-expands=true \
echo "iree-benchmark-module decode: "
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
iree-benchmark-module \
--hip_use_streams=true \
--module=$vmfb_128_decode \
--parameters=model=$irpa_path \
--device=hip://4 \
--function=decode_bs4 \
--input=4x1xi64 \
--input=4xi64 \
--input=4xi64 \
--input=4x5xi64 \
--input=128x8257532xf8E4M3FNUZ \
--benchmark_repetitions=3 \
| tee $benchmark_128_decode
###########################################2048###############################################################################################
mlir_path_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.mlir"
config_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.json"
vmfb_2048="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.vmfb"
vmfb_2048_decode="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.vmfb"
benchmark_2048_prefill="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.prefill.txt"
benchmark_2048_decode="/sharedfile/attn/2048/405b/fp8_attn_iree$iree_day.shark$shark_day.decode.txt"
echo $mlir_path_2048
echo $config_2048
echo $vmfb_2048
echo $benchmark_2048_prefill
create_file_if_not_exists $benchmark_2048_prefill
echo $benchmark_2048_decode
create_file_if_not_exists $benchmark_2048_decode
# echo $mlir_path_2048
python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=$irpa_path \
--output-mlir=$mlir_path_2048 \
--output-config=$config_2048 \
--bs-prefill=4 --bs-decode=4 --attention-kernel sharktank \
--activation-dtype=bfloat16 --use-attention-mask --use-hf --kv-cache-dtype=float8_e4m3fnuz
echo "iree-compile prefill: "
iree-compile \
$mlir_path_2048 \
--iree-hip-target=gfx942 \
-o=$vmfb_2048\
--iree-hal-target-device=hip \
--iree-opt-level=O3 \
--iree-hal-indirect-command-buffers=true \
--iree-stream-resource-memory-model=discrete \
--iree-hal-memoization=true \
--iree-codegen-enable-default-tuning-specs=true
# --iree-dispatch-creation-propagate-collapse-across-expands=true \
echo "iree-benchmark-module prefill: "
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
iree-benchmark-module \
--hip_use_streams=true \
--module=$vmfb_2048 \
--parameters=model=$irpa_path \
--device=hip://4 \
--function=prefill_bs4 \
--input=4x2048xi64 \
--input=4xi64 \
--input=4x64xi64 \
--input=513x8257536xf8E4M3FNUZ \
--benchmark_repetitions=3 \
| tee $benchmark_2048_prefill
echo "iree-compile decode: "
iree-compile \
$mlir_path_2048 \
--iree-hip-target=gfx942 \
-o=$vmfb_2048_decode\
--iree-hal-target-device=hip \
--iree-opt-level=O3 \
--iree-hal-indirect-command-buffers=true \
--iree-stream-resource-memory-model=discrete \
--iree-hal-memoization=true
# --iree-dispatch-creation-propagate-collapse-across-expands=true
echo "iree-benchmark-module decode: "
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
iree-benchmark-module \
--hip_use_streams=true \
--module=$vmfb_2048_decode \
--parameters=model=$irpa_path \
--device=hip://4 \
--function=decode_bs4 \
--input=4x1xi64\
--input=4xi64\
--input=4xi64 \
--input=4x65xi64 \
--input=513x8257536xf8E4M3FNUZ \
--benchmark_repetitions=3 \
| tee $benchmark_2048_decode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment