Last active
February 10, 2025 06:30
-
-
Save AmosLewis/2e74a4470d60dc21eaea8ef8a0bbe865 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(.venv) ➜ dan python -m sharktank.evaluate.perplexity_iree \ | |
--irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \ | |
--tokenizer-config-json=/home/chi/src/test/llama/dan/tokenizer_config.json \ | |
--iree-device='hip://4' \ | |
--iree-hal-target-device=hip \ | |
--iree-hip-target=gfx942 \ | |
--attention-kernel decomposed \ | |
--num-prompts=1 | |
ModuleNotFoundError: No module named 'tiktoken' |
with new iree and change in perplexity_iree.py, the up issue fixed. But get a new bug.
(.venv) ➜ dan python -m sharktank.evaluate.perplexity_iree \
--irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \
--tokenizer-config-json=/home/chi/src/test/llama/dan/config.json \
--iree-device='hip://4' \
--iree-hal-target-device=hip \
--iree-hip-target=gfx942 \
--attention-kernel decomposed \
--num-prompts=1
/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
return torch.from_numpy(wrapper)
INFO:eval Batch size: 1
INFO:eval get_prompts: 3.42 secs
INFO:eval Compiling: /home/chi/src/test/llama/dan/fp8.irpa
INFO:eval Exporting mlir:
cd /home/chi/src/shark-ai && python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8.irpa --output-mlir=/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir --output-config=/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.json --bs=1 --block-seq-stride=32 --attention-dtype=float16 --activation-dtype=float16 --attention-kernel decomposed --use-attention-mask
INFO:eval Exported to mlir successfully:
Exporting prefill_bs1
Exporting decode_bs1
GENERATED!
Exporting
Saving to '/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir'
INFO:eval export_to_mlir: 00 hrs : 02 mins : 13.39 secs
INFO:eval Launching compile command:
cd /home/chi/src/shark-ai && iree-compile /home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir --iree-hip-target=gfx942 -o=/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.vmfb --iree-hal-target-device=hip
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 507, in <module>
main(sys.argv[1:])
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 487, in main
ppl = run_perplexity(
^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 432, in run_perplexity
vmfb_path = perplexity.compile_model(weight_path_str)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 89, in wrapper
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 146, in compile_model
vmfb_path = export_artifacts.get_artifacts()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/utils/export_artifacts.py", line 343, in get_artifacts
self.compile_to_vmfb(
File "/home/chi/src/shark-ai/sharktank/sharktank/utils/export_artifacts.py", line 116, in wrapper
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/utils/export_artifacts.py", line 253, in compile_to_vmfb
raise IreeCompileException(proc, cwd)
sharktank.utils.export_artifacts.IreeCompileException: Error invoking iree-compile
Error code: 1
Stderr diagnostics:
/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir:2122:12: error: failed to legalize operation 'torch.aten.index_put.hacked_twin' that was explicitly marked illegal
%706 = torch.aten.index_put %696, %705, %702, %false_204 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
^
/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir:2122:12: note: see current operation: %1341 = "torch.aten.index_put.hacked_twin"(%1327, %1340, %1336, %54) : (!torch.vtensor<[?,32,8,128],f16>, !torch.list<vtensor>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f16>
/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir:26786:12: error: failed to legalize operation 'torch.aten.index_put.hacked_twin' that was explicitly marked illegal
%737 = torch.aten.index_put %719, %736, %714, %false_123 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[1,1,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,2,32,8,128],f16>
^
/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir:26786:12: note: see current operation: %1300 = "torch.aten.index_put.hacked_twin"(%1289, %1299, %1283, %68) : (!torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<vtensor>, !torch.vtensor<[1,1,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,2,32,8,128],f16>
Invoked with:
cd /home/chi/src/shark-ai && iree-compile /home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.mlir --iree-hip-target=gfx942 -o=/home/chi/src/shark-ai/perplexity_ci_artifacts/fp8_decomposed.vmfb --iree-hal-target-device=hip
same issue even I added --attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16
python -m sharktank.evaluate.perplexity_iree --irpa-file=/home/chi/src/test/llama/dan/fp8.irpa --tokenizer-config-json=/home/chi/src/test/llama/dan/config.json --iree-device='hip://4' --iree-hal-target-device=hip --iree-hip-target=gfx942 --attention-kernel decomposed --attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16 --num-prompts=1
With archana new change for perplexity_iree.py : https://gist.github.com/archana-ramalingam/086cc63afc8c7fb9b9b3497b09071735
python -m sharktank.evaluate.perplexity_iree \
--irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \
--tokenizer-config-json=/home/chi/src/test/llama/dan/tokenizer.json \
--iree-device='hip://4' \
--iree-hal-target-device=hip \
--iree-hip-target=gfx942 \
--attention-kernel decomposed \
--num-prompts=1
/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
return torch.from_numpy(wrapper)
INFO:eval Batch size: 1
INFO:eval get_prompts: 3.54 secs
INFO:eval Compiling: /home/chi/src/test/llama/dan/fp8.irpa
INFO:eval Export & compile: 0.049 ms
INFO:eval load_model: 4.95 secs
DEBUG:eval Prompts for Evaluation:
DEBUG:eval Prompt 0:
Tokens: b" Manila has six legislative districts that serve as the constituencies for the election of the city 's representatives to the lower house of the Congress of the Philippines and of the regular members to the Sangguniang Panlungsod ( SP ; City Council ) . Each district elects one representative to the House of Representatives and six SP members to the council . The city , along with the rest of the nation , elects 12 senators as one at @-@ large district ."
Token ids: [57664, 706, 4848, 27743, 27536, 430, 8854, 439, 279, 31605, 6072, 369, 279, 6355, 315, 279, 3363, 364, 82, 24005, 311, 279, 4827, 3838, 315, 279, 8151, 315, 279, 26363, 323, 315, 279, 5912, 3697, 311, 279, 52022, 13562, 28323, 11233, 75, 29222, 347, 320, 9440, 2652, 4409, 9251, 883, 662, 9062, 9474, 4135, 82, 832, 18740, 311, 279, 4783, 315, 40845, 323, 4848, 9440, 3697, 311, 279, 15177, 662, 578, 3363, 1174, 3235, 449, 279, 2800, 315, 279, 7140, 1174, 4135, 82, 220, 717, 40020, 439, 832, 520, 571, 12, 31, 3544, 9474, 662, 0]
eval: Calculating logits: 0%| | 0/94 [00:00<?, ?it/s]
DEBUG:eval Iteration: 0
DEBUG:eval Prefill:
DEBUG:eval Input:
DEBUG:eval [' Manila']
DEBUG:eval [[57664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
eval: Calculating logits: 0%| | 0/94 [00:00<?, ?it/s]
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 518, in <module>
main(sys.argv[1:])
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 498, in main
ppl = run_perplexity(
^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 445, in run_perplexity
ppl = perplexity.get_perplexity()
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 92, in wrapper
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 398, in get_perplexity
self.get_logits(page_cache_size=self.page_cache_size)
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 92, in wrapper
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/chi/src/shark-ai/sharktank/sharktank/evaluate/perplexity_iree.py", line 327, in get_logits
element_type=ireert.HalElementType.FLOAT_8_E4M3_FNUZ,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/enum.py", line 787, in __getattr__
raise AttributeError(name) from None
AttributeError: FLOAT_8_E4M3_FNUZ
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pass tokenizer.json instead of the tokenizer_config.json