Skip to content

Instantly share code, notes, and snippets.

Change llama_harness_tracy.dockerfile with

RUN git clone https://github.com/iree-org/iree.git \
    && cd iree \
    && git submodule update --init

ENV PATH=/iree/build-release/tools:/iree/third_party/tracy/capture/build:/iree/build-release/tracy/:$PATH

RUN git clone https://github.com/nod-ai/shark-ai.git \
    && cd shark-ai \
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh
[+] Building 183.1s (11/21) docker:default
=> [internal] load build definition from llama_harness_355_nightly.dockerfile 0.0s
=> => transferring dockerfile: 4.55kB 0.0s
=> [internal] load metadata for ghcr.io/rocm/no_rocm_image_ubuntu24_04:main 0.5s
=> [internal] load .dockerignore 0.0s
=> => t
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_tracy_docker.sh
[+] Building 11.3s (12/18) docker:default
=> [internal] load build definition from llama_harness_tracy.dockerfile 0.0s
=> => transferring dockerfile: 5.33kB 0.0s
=> [internal] load metadata for docker.io/rocm/dev-ubuntu-22.04:6.4.1 0.8s
=> [internal] load .dockerignore
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-10T08:11:28.623037
INFO:root:####################################################################################################################################################################################
Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 1 --decode_bs 2 --user_conf_path user.conf --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_405b_config_fp4.json
##############################################################################################################################################################################################
INFO:Llama-405B-Dataset:Loading dataset...
INFO:Llama
root@smci355-ccs-aus-n10-09:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_405b_config_fp4.json
Warning: Missing argument '--test-mode'
Info: Defaulting to test mode 'PerformanceOnly'
Warning: Missing argument '--test-scenario'
Info: Defaulting to test scenario 'Offline'
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-08T02:11:29.806867
INFO:root:####################################################################################################################################################################################
Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 4 --decode_bs 4 --user_conf_path user.conf --count 5 --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly --debug False --verbose False --user_conf_path user.c
iree
commit 33548616294b02b60467d9c7b68e494a85c7b17f (HEAD -> main, origin/main, origin/HEAD)
Author: Vivian Zhang <[email protected]>
Date:   Fri Sep 5 13:09:33 2025 -0700

shark-ai
commit 9c173373eb1db0c2be523580a261c28e8115ad52 (HEAD -> main, origin/main, origin/HEAD)
Author: Alex Vasile <[email protected]>
Date: Fri Sep 5 18:59:59 2025 -0400

On machine smci355-ccs-aus-n10-09

Use LLAMA_inference/llama_harness_355_nightly.dockerfile https://github.com/nod-ai/2024q2-sdxl-mlperf-sprint/pull/195/files#diff-740fec7b8265c17d6e775063cd320bf504718e2f6defc770a999abc41f310a61

((.venv12) ) ➜  2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh                                                      
[+] Building 39.9s (5/21)                                                                                                                                                                                   docker:default
 => [internal] load metadata for ghcr.io/rocm/no_rocm_image_ubuntu24_04:main                                                                                                                                          0.4s
 => [internal] load .dockerignore                                                                                                                                                
root@smci355-ccs-aus-n10-09:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_8b_config_fp16.json
Warning: Missing argument '--test-mode'
Info: Defaulting to test mode 'PerformanceOnly'
Warning: Missing argument '--test-scenario'
Info: Defaulting to test scenario 'Offline'
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-04T06:44:02.508536
INFO:root:####################################################################################################################################################################################
Running python3.11 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 4 --decode_bs 4 --user_conf_path user.conf --count 5 --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly --debug False --verbose False --user_conf_path user
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/run_docker_8b_mi355.sh
always
root@smci355-ccs-aus-n10-09:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_8b_config_fp8.json
Warning: Missing argument '--test-mode'
Info: Defaulting to test mode 'PerformanceOnly'
Warning: Missing argument '--test-scenario'
Info: Defaulting to test scenario 'Offline'
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-04T06:34:39.128962
INFO:root:####################################################################################################################################################################################
Running python3.11 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 4 --decode_bs 4 --user_conf_path user.conf --count 50 --tensor_path /data/mlperf_llama3.1_405b_data
((.venv12) ) ➜ shark-ai git:(2bb2d590b) ✗ /sharedfile/f16/export_run_f16_8b_tp1.sh
No flag provided. Using default iree_day 0828.
No flag provided. Using default shark_day 0828_2bb_kv8.
/sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.mlir
/sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.json
/sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.vmfb
/sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.prefill.txt
File created: /sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.prefill.txt
/sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.decode.txt
File created: /sharedfile/f16/128/8b/out/8b_fp16_iree0828.shark0828_2bb_kv8.decode.txt