ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
/home/chi/src/iree-build/tools/iree-benchmark-module \
--hip_use_streams=true \
--module=f8_.vmfb \
--parameters=model=fp8.irpa \
--device=hip://4 \
--function=prefill_bs1 \
--input=1x32xi64=@/sharedfile/prefill/prefill_token_ids_1_32.bin \
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/chi/src/iree-build/tools/iree-run-module --help | |
# ============================================================================ | |
# 👻 IREE: iree-run-module | |
# ============================================================================ | |
Runs a function within a compiled IREE module and handles I/O parsing | |
and optional expected value verification/output processing. Modules | |
can be provided by file path (`--module=file.vmfb`) or read from stdin | |
(`--module=-`) and the function to execute matches the original name | |
provided to the compiler (`--function=foo` for `func.func @foo`). |
python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8_attn.irpa \
--output-mlir=/home/chi/src/test/llama/dan/f8_attn_chi.mlir \
--output-config=/home/chi/src/test/llama/dan/config_attn_chi.json \
--bs=1 --attention-kernel sharktank \
--attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16 --use-hf
/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:203.)
return torch.from_numpy(wrapper)
Exporting prefill_bs1
Traceback (most recent call last):
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(.venv) ➜ dan python -m sharktank.evaluate.perplexity_iree \ | |
--irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \ | |
--tokenizer-config-json=/home/chi/src/test/llama/dan/tokenizer_config.json \ | |
--iree-device='hip://4' \ | |
--iree-hal-target-device=hip \ | |
--iree-hip-target=gfx942 \ | |
--attention-kernel decomposed \ | |
--num-prompts=1 | |
ModuleNotFoundError: No module named 'tiktoken' |
with nod-ai/shark-ai#896
(.venv) ➜ shark-ai git:(users/dan-garvey/enable_custom_fp8_matmul) python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \
--output-mlir=/home/chi/src/test/llama/dan/fp8_dan.mlir \
--output-config=/home/chi/src/test/llama/dan/config.json \
--bs=1 --attention-kernel torch \
--attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16
...
GENERATED!
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> | |
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> | |
module @module { | |
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16> | |
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_v.q_input:rscale" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func.func @function(%arg2: !torch.vtensor<[1,?],si64>, %669:!torch.vtensor<[1,?],si64>, %667: !torch.vtensor<[?,32,8,128],f16>, %674:!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>) -> !torch.vtensor<[?,32,8,128],f16> { | |
%false = torch.constant.bool false | |
%int1 = torch.constant.int 1 | |
%670 = torch.aten.size.int %arg2, %int1 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.int | |
%675 = torch.prim.ListConstruct %670 : (!torch.int) -> !torch.list<int> | |
%676 = torch.aten.view %669, %675 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
%677 = torch.prim.ListConstruct %676 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%678 = torch.aten.index_put %667, %677, %674, %false : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16> | |
return %678 : !torch.vtensor<[?,32,8,128],f16> | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module { | |
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> { | |
%int26 = torch.constant.int 26 | |
%int15 = torch.constant.int 15 | |
return %arg0 : !torch.vtensor<[32,4096],bf16> | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module { | |
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[4096,4096],bf16> { | |
%int26 = torch.constant.int 26 | |
%int15 = torch.constant.int 15 | |
return %arg1: !torch.vtensor<[4096,4096],bf16> | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/chi/src/iree-build/tools/iree-run-module \ | |
--parameter_mode="mmap" \ | |
--hip_use_streams=true \ | |
--module=fp8__initializer_0_dispatch_0.vmfb \ | |
--parameters=model=fp8.irpa \ | |
--device=hip://4 \ | |
--function=prefill_bs1 \ | |
--input=1x32xi64=@/sharedfile/prefill/prefill_token_ids_1_32.bin \ | |
--input=1xi64=@/sharedfile/prefill/prefill_seq_lens_1.bin \ | |
--input=1x1xi64=@/sharedfile/prefill/prefill_seq_block_ids_1_1.bin \ |