Chi_Liu AmosLewis

MLIR | Deep Learning | Computer Vision

AmosLewis / os_cpu_gpu_output.txt

Created September 20, 2025 05:04

	root@smci350-zts-gtu-c8-25:/mlperf/harness# echo "OS:" && cat /etc/os-release \| grep -E "^(NAME=\|VERSION=)";
	echo "CPU: " && cat /proc/cpuinfo \| grep "model name" \| sort --unique;
	echo "GPU:" && /opt/rocm/bin/rocminfo \| grep -E "^\s*(Name\|Marketing Name)";
	OS:
	NAME="Ubuntu"
	VERSION="22.04.5 LTS (Jammy Jellyfish)"
	CPU:
	model name : AMD EPYC 9575F 64-Core Processor
	GPU:
	Name: AMD EPYC 9575F 64-Core Processor

AmosLewis / rocminfo_output.txt

Created September 20, 2025 05:01

	root@smci350-zts-gtu-c8-25:/mlperf/harness# /opt/rocm/bin/rocminfo
	ROCk module version 6.14.14 is loaded
	=====================
	HSA System Attributes
	=====================
	Runtime Version: 1.18
	Runtime Ext Version: 1.11
	System Timestamp Freq.: 1000.000000MHz
	Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
	Machine Model: LARGE

AmosLewis / user.conf

Created September 20, 2025 04:59

	# The format of this config file is 'key = value'.
	# The key has the format 'model.scenario.key'. Value is mostly int64_t.
	# Model maybe '*' as wildcard. In that case the value applies to all models.
	# All times are in milli seconds
	#
	*.Offline.min_duration = 6000
	*.Offline.min_query_count = 4
	*.Offline.max_query_count = 4

	*.Server.target_qps = 0.5

AmosLewis / shortfin_405b_config_fp4.json

Created September 20, 2025 04:54

	{
	"host": "0.0.0.0",
	"port": "8080",
	"model_config": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.json",
	"tokenizer_json": "/shark-dev/tokenizer.json",
	"tokenizer_config_json": "/shark-dev/tokenizer_config.json",
	"vmfb": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.vmfb",
	"parameters": [
	"/shark-dev/weights/fp4/fp4_2025_07_10_fn.irpa"
	],

AmosLewis / llama_harness_355.dockerfile

Created September 20, 2025 04:43

	FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915

	# ######################################################
	# # Install MLPerf+Shark reference implementation
	# ######################################################
	ENV DEBIAN_FRONTEND=noninteractive
	SHELL ["/bin/bash", "-c"]

	# apt dependencies
	RUN apt-get --fix-broken install -y && apt-get update && apt-get install -y \

AmosLewis / rocprof_bug_sgl_rocm7_0915_official.txt

Last active September 20, 2025 01:08

	docker is 7.0.0 FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915

	export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
	export ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
	rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
	--devices "0,1,2,3,4,5,6,7" --scenario "$TEST_SCENARIO" \
	--test_mode "$TEST_MODE" \
	--bs 2 \
	--user_conf_path user.conf \
	--count 8 \

AmosLewis / accuracyonly_all.log

Created September 17, 2025 01:26

	user.conf:
	*.Offline.min_duration = 600000

	INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-16T21:21:31.617026
	INFO:root:####################################################################################################################################################################################
	Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode AccuracyOnly --bs 2 --user_conf_path user.conf --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflineAccuracyOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_405b_config_fp4.json
	##############################################################################################################################################################################################
	INFO:Llama-405B-Dataset:Loading datas

AmosLewis / rocprofv3-mlperf.log

Last active September 16, 2025 20:57

	/opt/rocm/bin/rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
	--devices "0,1,2,3,4,5,6,7" \
	--scenario "$TEST_SCENARIO" \
	--test_mode "$TEST_MODE" \
	--bs 2 \
	--user_conf_path user.conf \
	--tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl \
	--logfile_outdir "Output${TEST_SCENARIO}${TEST_MODE}" \
	--debug "$DEBUG" \
	--verbose "$VERBOSE" \

AmosLewis / dockerbug.mi3551009.log

Created September 16, 2025 05:09

	((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ git config --global credential.helper store
	git config --global user.name AmosLewis
	git config --global user.password ghp_nsRzvxclTLke......

	((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ git config --global --list

	[6] + 450113 suspended git config --global --list
	((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh
	[+] Building 209.4s (12/23) docker:default
	=> [internal] load build definition from llama_harness_355_nightly.dockerfile 0.0s

AmosLewis / dockerbug.smci350-zts-gtu-c8-25.zts-gtu.dcgpu.log

Last active September 15, 2025 18:23

	docker build --no-cache --platform linux/amd64 --tag mlperf_llama_mi350:405b_chi --file LLAMA_inference/llama_harness_355_nightly.dockerfile .

	➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh
	DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
	Install the buildx component to build images with BuildKit:
	https://docs.docker.com/go/buildx/

	Sending build context to Docker daemon 46.69MB
	Step 1/31 : FROM ghcr.io/rocm/no_rocm_image_ubuntu24_04:main
	---> e6ad78a4d6b1