Skip to content

Instantly share code, notes, and snippets.

# # Check if a command-line argument is provided
if [ -z "$1" ]; then
iree_day="0630"
echo "No flag provided. Using default iree_day $iree_day."
else
iree_day="$1"
fi
if [ -z "$2" ]; then
shark_day="0630"
wget https://gist.githubusercontent.com/AmosLewis/9e5a0f64e9cbc4254fe1e93621028183/raw/5036d41c2728eba4e4d7a30edb36f25629dcb790/export_run_f16_405b_pp8.sh
(.venv) ➜ shark-ai git:(main) ✗ /sharedfile/f16/export_run_f16_405b_pp8.sh
No flag provided. Using default iree_day 0630.
No flag provided. Using default shark_day 0630.
/sharedfile/f16/128/405b/pp8/out/f16_iree0630.shark0630.mlir
/sharedfile/f16/128/405b/pp8/out/f16_iree0630.shark0630.json
/sharedfile/f16/128/405b/pp8/out/f16_iree0630.shark0630.prefill.vmfb
/sharedfile/f16/128/405b/pp8/out/f16_iree0630.shark0630.decode.vmfb
/sharedfile/f16/128/405b/pp8/out/f16_iree0630.shark0630.prefill.txt
python -m sharktank.examples.sharding.shard_llm_dataset \
--irpa-file /sharedfile/attn/128/405b/native_fp8_e4m3fnuz_llama3_405b.irpa \
--output-irpa /sharedfile/attn/128/405b/native_fp8_e4m3fnuz_llama3_405b_tp8.irpa\
--tensor-parallelism-size 8
/home/chi/src/shark-ai/.venv/lib/python3.12/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
return torch.from_numpy(wrapper)
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/chi/src/shark-ai/sharktank/sharktank/examples/sharding/shard_llm_dataset.py",
python -m sharktank.tools.dump_gguf --hf-dataset=llama3_8B_fp16 --num-blocks=4 --output-irpa=llama8b_4block.irpa
meta-llama-3.1-8b-instruct.f16.gguf: 16%|██████████████▉ | 2.63G/16.1G [00:27<02:59, 74.7MB/s]Error while downloading from https://cdn-lfs-us-1.hf.co/repos/f0/ba/f0ba0ce82b70acfa083c185e639d32393561974e984720633c23f2cd17e7f921/b74a705165a2cd109ae98260ef4218f3b159080ef59de2b6b2e18bfa03bd29e2?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27meta-llama-3.1-8b-instruct.f16.gguf%3B+filename%3D%22meta-llama-3.1-8b-instruct.f16.gguf%22%3B&Expires=1751055323&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MTA1NTMyM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2YwL2JhL2YwYmEwY2U4MmI3MGFjZmEwODNjMTg1ZTYzOWQzMjM5MzU2MTk3NGU5ODQ3MjA2MzNjMjNmMmNkMTdlN2Y5MjEvYjc0YTcwNTE2NWEyY2QxMDlhZTk4MjYwZWY0MjE4ZjNiMTU5MDgwZWY1OWRlMmI2YjJlMThiZmEwM2JkMjllMj9yZXNwb25zZS1jb250ZW5
# # Check if a command-line argument is provided
if [ -z "$1" ]; then
iree_day="0624"
echo "No flag provided. Using default iree_day $iree_day."
else
iree_day="$1"
fi
if [ -z "$2" ]; then
shark_day="0626"
./export_run_f8_405b_tp1.sh
No flag provided. Using default iree_day 0624.
No flag provided. Using default shark_day 0626.
/sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.mlir
/sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.json
/sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.prefill.vmfb
/sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.prefill.txt
File already exists: /sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.prefill.txt
/sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.decode.txt
File already exists: /sharedfile/attn/128/405b/fp8_attn_iree0624.shark0626.decode.txt
python3 -m sharktank.examples.sharding.shard_llm_dataset \
--irpa-file native_fp8_e4m3fnuz_llama3_405b.irpa \
--output-irpa native_fp8_e4m3fnuz_llama3_405b_tp8.irpa \
--tensor-parallelism-size 8
/home/chi/src/shark-ai/.venv/lib/python3.12/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
return torch.from_numpy(wrapper)
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/chi/src/shark-ai/sharktank/sharktank/examples/sharding/shard_llm_dataset.py", line 51, in <module>
(.venv) ➜ shark-ai git:(ci-new) ✗ /sharedfile/attn/bisect/export_run_f8_8b_tp1.sh
No flag provided. Using default iree_day 0624.
No flag provided. Using default shark_day 0625.
/sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.mlir
/sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.json
/sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.prefill.vmfb
/sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.prefill.txt
File created: /sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.prefill.txt
/sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.decode.txt
File created: /sharedfile/attn/128/out/fp8_attn_iree0624.shark0625.decode.txt
Starting offline serving for Chat ...
Requests per second: 0.271810
Latencies: av: 14.714040261460468, min: 14.430016363039613, max: 15.34828608110547, median: 14.534963036538102, sd: 0.3680177929522141
TTFT: av: 0.1703249174024677, min: 0.16336269420571625, max: 0.18595529114827514, median: 0.16891529446002096, sd: 0.006463870662953834
TPOT: av: 0.014161157569228065, min: 0.013890840846114768, max: 0.014779619741985373, median: 0.0139818920325642, sd: 0.00035883067222065466
Requests per second: 0.453777
Latencies: av: 13.956019577235566, min: 0.0, max: 20.607322688912973, median: 17.627776105422527, sd: 8.390354599185105
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
# # Check if a command-line argument is provided
if [ -z "$1" ]; then
iree_day="0616"
echo "No flag provided. Using default iree_day $iree_day."
else
iree_day="$1"
fi
if [ -z "$2" ]; then
shark_day="0616"