AmosLewis’s gists

AmosLewis / fp8_attn_iree0828.shark0828_2bb_irpafp16_kv8.bug.log

Created September 4, 2025 00:13

	python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/shark-dev/llama3.1/8b/fp16/weight/8b_fp16.irpa --output-mlir=/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_fp16irpa_kvfp8.mlir --output-config=/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_fp16irpa_kvfp8.json --bs-prefill=4 --bs-decode=4 --attention-kernel=sharktank --attention-dtype=float16 --activation-dtype=float16 --use-attention-mask --use-hf --kv-cache-dtype=float8_e4m3fn
	/home/chiliu12/src/shark-ai/.venv12/lib/python3.12/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
	return torch.from_numpy(wr

AmosLewis / fp8_attn_iree0828.shark0828_2bb_irpafp16_kv16.log

Created September 4, 2025 00:10

	((.venv12) ) ➜ shark-ai git:(2bb2d590b) ✗ /sharedfile/f8/export_run_f8_8b_tp1.sh
	No flag provided. Using default iree_day 0828.
	No flag provided. Using default shark_day 0828_2bb_kv16.
	/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.mlir
	/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.json
	/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.vmfb
	/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.prefill.txt
	File already exists: /sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.prefill.txt
	/sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.decode.txt
	File already exists: /sharedfile/f8/128/out/fp8_attn_iree0828.shark0828_2bb_kv16.decode.txt

AmosLewis / iree0902.shark0903_4aa.compile_bug,log

Created September 3, 2025 22:09

	((.venv12) ) ➜ shark-ai git:(main) ✗ /sharedfile/f4/export_run_f4_405b_pp1_bs4.sh
	No flag provided. Using default iree_day 0902.
	No flag provided. Using default shark_day 0903_4aa.
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.mlir
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.json
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.vmfb
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.prefill.txt
	File created: /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.prefill.txt
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.decode.txt
	File created: /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0902.shark0903_4aa.decode.txt

AmosLewis / iree0903.shark0903_4aa.compile_bug,log

Created September 3, 2025 21:22

	((.venv12) ) ➜ shark-ai git:(main) ✗ /sharedfile/f4/export_run_f4_405b_pp1_bs4.sh
	No flag provided. Using default iree_day 0903.
	No flag provided. Using default shark_day 0903_4aa.
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.mlir
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.json
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.vmfb
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.prefill.txt
	File already exists: /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.prefill.txt
	/sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.decode.txt
	File already exists: /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0903.shark0903_4aa.decode.txt

AmosLewis / 0828_405b_fp4_mlperf_run_offline_bug.log

Last active September 3, 2025 20:49

	root@smci355-ccs-aus-n10-09:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_405b_config_fp4.json
	Warning: Missing argument '--test-mode'
	Info: Defaulting to test mode 'PerformanceOnly'
	Warning: Missing argument '--test-scenario'
	Info: Defaulting to test scenario 'Offline'
	INFO:root:####################################################################################################################################################################################
	Running python3.11 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 4 --decode_bs 4 --user_conf_path user.conf --count 50 --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_405b_config_fp4.json
	##########################################################################################################################################

AmosLewis / add_wave_gemm_optims_export_bug.log

Last active August 23, 2025 06:14

	(.venv) ➜ wave git:(main) /sharedfile/f4/export_run_f4_405b_pp1_bs4.sh
	No flag provided. Using default iree_day 0822.
	No flag provided. Using default shark_day 0822_add_wave_gemm_optims.

	export model: python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/shark-dev/llama3.1/405b/instruct/weights/fp4/fp4_2025_07_10_fn.irpa --output-mlir=/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0822.shark0822_add_wave_gemm_optims.mlir --output-config=/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0822.shark0822_add_wave_gemm_optims.json --bs-prefill=4 --bs-decode=4 --block-seq-stride=32 --attention-dtype=float16 --activation-dtype=float16 --attention-kernel=torch --kv-cache-dtype=float8_e4m3fn --use-hf --top-k=1

	/home/chiliu12/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined

AmosLewis / save_real_input_npy.md

Last active September 23, 2025 16:05

nod-ai/shark-ai#2088

python -m sharktank.tools.run_llm_vmfb \
--prompt /data/mlperf_llama/artifacts/chi/real_inputs/prompt_input_128.txt \
--irpa /shark-dev/llama3.1/405b/instruct/weights/fp4/fp4_2025_07_10_fn.irpa \
--vmfb /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0827.shark0827_554.vmfb \
--config /sharedfile/f4/2500/405b/pp1/out/f4_bs4_ds4.iree0827.shark0827_554.json \
--tokenizer /shark-dev/llama3.1/405b/instruct/weights/fp4/tokenizer.json \
--tokenizer_config /shark-dev/llama3.1/405b/instruct/weights/fp4/tokenizer_config.json \

AmosLewis / run_llvm_vmfb_hipErrorNoDevice.log

Last active August 20, 2025 19:55

	(.venv) ➜ shark-ai git:(main) ✗ /sharedfile/f4/export_run_f4_405b_pp1_bs4.sh
	No flag provided. Using default iree_day 0818.
	No flag provided. Using default shark_day 0820.
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.mlir
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.json
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.prefill.vmfb
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.prefill.vmfb
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.prefill.txt
	File created: /sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.prefill.txt
	/sharedfile/f4/128/405b/pp1/out/f4_bs4_ds4.iree0818.shark0820.decode.txt

AmosLewis / 405b_fp4_bs4_8.log

Last active August 16, 2025 00:28

	INFO:eval
	{
	"perplexities": [
	3.323654,
	6.722357,
	2.652504,
	9.565708
	],
	"mean_perplexity": 5.566056
	}

AmosLewis / mlperf_8b_fp16_nocount_dbc_8192.log

Created August 15, 2025 02:15

	root@smci355-ccs-aus-n10-09:/mlperf/harness# ./run_offline.sh --shortfin-config shortfin_8b_config_fp16.json
	Warning: Missing argument '--test-mode'
	Info: Defaulting to test mode 'PerformanceOnly'
	Warning: Missing argument '--test-scenario'
	Info: Defaulting to test scenario 'Offline'
	INFO:root:####################################################################################################################################################################################
	Running python3.11 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode PerformanceOnly --prefill_bs 4 --decode_bs 4 --user_conf_path user.conf --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflinePerformanceOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_8b_config_fp16.json
	#######################################################################################################################################################

Chi_Liu AmosLewis