Skip to content

Instantly share code, notes, and snippets.

docker

FROM ghcr.io/rocm/no_rocm_image_ubuntu24_04:main

# ######################################################
# # Install MLPerf+Shark reference implementation
# ######################################################
ENV DEBIAN_FRONTEND=noninteractive

USER root
root@smci350-odcdh2-a05-1:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_405b_config_fp4.json --test-mode PerformanceOnly 2>&1 | tee server-time.log
Warning: Missing argument '--test-scenario'
Info: Defaulting to test scenario 'Offline'
Log started at: 2025-10-07 22:23:01
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-10-07T22:23:01.515306
INFO:root:####################################################################################################################################################################################
Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --bs 8 --user_conf_path user.conf --count 16 --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly-a05-pp8-16samples-ps8_ds200_dc_4096_1007 --debug False --verbose True --u
root@smci350-zts-gtu-c8-25:/mlperf/harness# echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)";
echo "CPU: " && cat /proc/cpuinfo | grep "model name" | sort --unique;
echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)";
OS:
NAME="Ubuntu"
VERSION="22.04.5 LTS (Jammy Jellyfish)"
CPU:
model name : AMD EPYC 9575F 64-Core Processor
GPU:
Name: AMD EPYC 9575F 64-Core Processor
root@smci350-zts-gtu-c8-25:/mlperf/harness# /opt/rocm/bin/rocminfo
ROCk module version 6.14.14 is loaded
=====================
HSA System Attributes
=====================
Runtime Version: 1.18
Runtime Ext Version: 1.11
System Timestamp Freq.: 1000.000000MHz
Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
Machine Model: LARGE
# The format of this config file is 'key = value'.
# The key has the format 'model.scenario.key'. Value is mostly int64_t.
# Model maybe '*' as wildcard. In that case the value applies to all models.
# All times are in milli seconds
#
*.Offline.min_duration = 6000
*.Offline.min_query_count = 4
*.Offline.max_query_count = 4
*.Server.target_qps = 0.5
{
"host": "0.0.0.0",
"port": "8080",
"model_config": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.json",
"tokenizer_json": "/shark-dev/tokenizer.json",
"tokenizer_config_json": "/shark-dev/tokenizer_config.json",
"vmfb": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.vmfb",
"parameters": [
"/shark-dev/weights/fp4/fp4_2025_07_10_fn.irpa"
],
FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
# ######################################################
# # Install MLPerf+Shark reference implementation
# ######################################################
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"]
# apt dependencies
RUN apt-get --fix-broken install -y && apt-get update && apt-get install -y \
docker is 7.0.0 FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
  --devices "0,1,2,3,4,5,6,7" --scenario "$TEST_SCENARIO" \
  --test_mode "$TEST_MODE" \
  --bs 2 \
  --user_conf_path user.conf \
  --count 8 \
user.conf:
*.Offline.min_duration = 600000
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-16T21:21:31.617026
INFO:root:####################################################################################################################################################################################
Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode AccuracyOnly --bs 2 --user_conf_path user.conf --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflineAccuracyOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_405b_config_fp4.json
##############################################################################################################################################################################################
INFO:Llama-405B-Dataset:Loading datas
/opt/rocm/bin/rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
--devices "0,1,2,3,4,5,6,7" \
--scenario "$TEST_SCENARIO" \
--test_mode "$TEST_MODE" \
--bs 2 \
--user_conf_path user.conf \
--tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl \
--logfile_outdir "Output${TEST_SCENARIO}${TEST_MODE}" \
--debug "$DEBUG" \
--verbose "$VERBOSE" \