AmosLewis’s gists

AmosLewis / llama_8b_f16_0501.sh

Last active May 2, 2025 22:39

	python -m sharktank.examples.paged_llm_v1 \
	--irpa-file=/shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
	--tokenizer-config-json=/shark-dev/8b/instruct/tokenizer_config.json \
	--prompt="Name the capital of the United States"


	# /home/chi/src/shark-ai/.venv/lib/python3.12/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:203.)
	# return torch.from_numpy(wrapper)
	# :: Prompt tokens:
	# prompt_0:

AmosLewis / run_70b_without_server_0501.sh

Last active May 2, 2025 23:10

	python -m sharktank.examples.paged_llm_v1 \
	--irpa-file=/shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
	--tokenizer-config-json=/shark-dev/70b/instruct/tokenizer_config.json \
	--prompt="Name the capital of the United States."

	# /home/chi/src/shark-ai/.venv/lib/python3.12/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:203.)
	# return torch.from_numpy(wrapper)
	# :: Prompt tokens:
	# prompt_0:
	# b'Name the capital of the United States.'

AmosLewis / hf_gated_error.txt

Created May 2, 2025 16:36

	(.venv) ➜ shark-ai git:(main) ✗
	huggingface-cli login

	_\| _\| _\| _\| _\|_\|_\| _\|_\|_\| _\|_\|_\| _\| _\| _\|_\|_\| _\|_\|_\|_\| _\|_\| _\|_\|_\| _\|_\|_\|_\|
	_\| _\| _\| _\| _\| _\| _\| _\|_\| _\| _\| _\| _\| _\| _\| _\|
	_\|_\|_\|_\| _\| _\| _\| _\|_\| _\| _\|_\| _\| _\| _\| _\| _\| _\|_\| _\|_\|_\| _\|_\|_\|_\| _\| _\|_\|_\|
	_\| _\| _\| _\| _\| _\| _\| _\| _\| _\| _\|_\| _\| _\| _\| _\| _\| _\| _\|
	_\| _\| _\|_\| _\|_\|_\| _\|_\|_\| _\|_\|_\| _\| _\| _\|_\|_\| _\| _\| _\| _\|_\|_\| _\|_\|_\|_\|

	A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.

AmosLewis / b5878302.txt

Created May 1, 2025 16:29

	(.venv) ➜ shark-ai git:(ad958230) ✗ git checkout b58783029e0cc3e1890b22339d470c394a66dcb4
	M requirements-iree-pinned.txt
	Previous HEAD position was ad958230 Add ops.mean for SplitPrimitiveTensor (#1308)
	HEAD is now at b5878302 Add perplexity calculation for Tensor and Pipeline parallized Llama models (#1279)
	(.venv) ➜ shark-ai git:(b5878302) ✗ pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --iree-device=hip://4 -k testBenchmark405B_f16_TP8_Non_Decomposed_Input_Len_128
	================================================= test session starts ==================================================
	platform linux -- Python 3.12.9, pytest-8.0.0, pluggy-1.5.0 -- /home/chi/src/shark-ai/.venv/bin/python
	cachedir: .pytest_cache
	metadata: {'Python': '3.12.9', 'Platform': 'Linux-6.8.0-52-generic-x86_64-with-glibc2.35', 'Packages': {'pytest': '8.0.0', 'pluggy': '1.5.0'}, 'Plugins': {'timeout': '2.3.1', 'anyio': '4.9.0', 'metadata': '3.1.1', 'html': '4

AmosLewis / 3a73dc3f.txt

Created May 1, 2025 16:26

	(.venv) ➜ shark-ai git:(b5878302) ✗ git checkout 3a73dc3f233bec037275dc75841a9f6112a32a45
	M requirements-iree-pinned.txt
	Previous HEAD position was b5878302 Add perplexity calculation for Tensor and Pipeline parallized Llama models (#1279)
	HEAD is now at 3a73dc3f [tuner][NFC] remove walk function over input module (#1307)
	(.venv) ➜ shark-ai git:(3a73dc3f) ✗ pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --iree-device=hip://4 -k testBenchmark405B_f16_TP8_Non_Decomposed_Input_Len_128
	================================================= test session starts ==================================================
	platform linux -- Python 3.12.9, pytest-8.0.0, pluggy-1.5.0 -- /home/chi/src/shark-ai/.venv/bin/python
	cachedir: .pytest_cache
	metadata: {'Python': '3.12.9', 'Platform': 'Linux-6.8.0-52-generic-x86_64-with-glibc2.35', 'Packages': {'pytest': '8.0.0', 'pluggy': '1.5.0'}, 'Plugins': {'timeout': '2.3.1', 'anyio': '4.9.0', 'metadata': '3.1.

AmosLewis / bisect_error.txt

Created April 24, 2025 21:20

	(bisect.venv) ➜ bisect python /home/chi/src/iree/build_tools/pkgci/bisect/bisect_packages.py \
	--good-ref=9e494bccb5472c5cc8f0910cbc339db8a2ea9aa7 \
	--bad-ref=b4e3694a31a92a814a7b127f288e5928705fb3c4 \
	--test-script=/sharedfile/attn/bisect/issue1.sh
	Welcome to bisect_packages.py!

	------------------------------------------------------------------
	--------- Configuration ------------------------------------------
	------------------------------------------------------------------

AmosLewis / export_run_build.sh

Last active April 24, 2025 21:31

	# run on SharkMI300X
	# cd /sharedfile/attn/bisect
	#./export_run_build.sh

	# # Check if a command-line argument is provided
	if [ -z "$1" ]; then
	iree_day="042411"
	echo "No flag provided. Using default iree_day $iree_day."
	else
	iree_day="$1"

AmosLewis / error_log_testBenchmark8B_fp8_TP1_Non_Decomposed.txt

Created April 8, 2025 00:35

	(.venv) ➜ shark-ai git:(chi/xfail_f16) ✗ pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s -m "expensive" --iree-hip-target=gfx942 --iree-device=hip://4 -k testBenchmark8B_fp8_TP1_Non_Decomposed
	======================================================================================= test session starts =======================================================================================
	platform linux -- Python 3.12.9, pytest-8.0.0, pluggy-1.5.0 -- /home/chi/src/shark-ai/.venv/bin/python
	cachedir: .pytest_cache
	metadata: {'Python': '3.12.9', 'Platform': 'Linux-6.8.0-52-generic-x86_64-with-glibc2.35', 'Packages': {'pytest': '8.0.0', 'pluggy': '1.5.0'}, 'Plugins': {'timeout': '2.3.1', 'anyio': '4.9.0', 'metadata': '3.1.1', 'html': '4.1.1', 'asyncio': '0.23.8', 'xdist': '3.5.0'}}
	rootdir: /home/chi/src/shark-ai/sharktank
	configfile: pyproject.toml
	plugins: timeout-2.3.1, anyio-4.9.0, metadata-3.1.1, html-4.1.1, asyncio-0.23.8, xdist-3.5.0
	asyncio: mode=Mode.STRICT
	collected 13 items / 12 desel

AmosLewis / get_trach.sh

Last active March 27, 2025 21:54

	# ssh chi@SharkMi300X
	# iree-3.4.0rc20250327
	# build iree with tracy
	git checkout iree-3.4.0rc20250327
	cmake -G Ninja -B ../iree-build-trace/ -S . \
	-DCMAKE_BUILD_TYPE=Release \
	-DIREE_ENABLE_ASSERTIONS=ON \
	-DIREE_ENABLE_SPLIT_DWARF=ON \
	-DIREE_ENABLE_THIN_ARCHIVES=ON \
	-DCMAKE_C_COMPILER_LAUNCHER=ccache \

AmosLewis / 907_export_bug.log

Created March 21, 2025 20:18

	(.venv) ➜ shark-ai git:(cbd6b7a6) ✗ python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/sharedfile/attn/fp8_attn.irpa \
	--output-mlir=/sharedfile/attn/128/fp8_attn.mlir \
	--output-config=/sharedfile/attn/128/config_attn.json \
	--bs-prefill=4 --bs-decode=4 --attention-kernel sharktank \
	--attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16 --use-attention-mask --use-hf --kv-cache-dtype=float8_e4m3fnuz
	cat_default
	conv2d_default
	conv2d_default
	einsum_2args
	elementwise_unary

	(.venv) ➜ shark-ai git:(main) ✗
	huggingface-cli login

	_\| _\| _\| _\| _\|_\|_\| _\|_\|_\| _\|_\|_\| _\| _\| _\|_\|_\| _\|_\|_\|_\| _\|_\| _\|_\|_\| _\|_\|_\|_\|
	_\| _\| _\| _\| _\| _\| _\| _\|_\| _\| _\| _\| _\| _\| _\| _\|
	_\|_\|_\|_\| _\| _\| _\| _\|_\| _\| _\|_\| _\| _\| _\| _\| _\| _\|_\| _\|_\|_\| _\|_\|_\|_\| _\| _\|_\|_\|
	_\| _\| _\| _\| _\| _\| _\| _\| _\| _\| _\|_\| _\| _\| _\| _\| _\| _\| _\|
	_\| _\| _\|_\| _\|_\|_\| _\|_\|_\| _\|_\|_\| _\| _\| _\|_\|_\| _\| _\| _\| _\|_\|_\| _\|_\|_\|_\|

	A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.

Chi_Liu AmosLewis