Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Last active February 5, 2025 21:45
Show Gist options
  • Save AmosLewis/9a10e4fc947717eb01b204b68468ac05 to your computer and use it in GitHub Desktop.
Save AmosLewis/9a10e4fc947717eb01b204b68468ac05 to your computer and use it in GitHub Desktop.

with nod-ai/shark-ai#896

(.venv) ➜  shark-ai git:(users/dan-garvey/enable_custom_fp8_matmul) python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8.irpa \
--output-mlir=/home/chi/src/test/llama/dan/fp8_dan.mlir \
--output-config=/home/chi/src/test/llama/dan/config.json \
--bs=1 --attention-kernel torch \
--attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16
...
GENERATED!
Exporting
Saving to '/home/chi/src/test/llama/dan/fp8_dan.mlir'
(.venv) ➜  shark-ai git:(users/dan-garvey/enable_custom_fp8_matmul) cd ../test/llama/dan
(.venv) ➜  dan ls
config.json  f8_.mlir  f8_.vmfb  fp8_dan.mlir  fp8_dan_old.json  fp8_dan_old.mlir  fp8.irpa  indexput.linalg.mlir  indexput.torch.mlir
(.venv) ➜  dan /home/chi/src/iree-build/tools/iree-compile fp8_dan.mlir \
  --iree-hip-target=gfx942 \
  -o=fp8_dan.vmfb \
  --iree-hal-target-device=hip \
  --iree-dispatch-creation-enable-aggressive-fusion=true \
  --iree-global-opt-propagate-transposes=true \
  --iree-opt-aggressively-propagate-transposes=true \
  --iree-opt-data-tiling=false \
  --iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \
  --iree-hal-indirect-command-buffers=true \
  --iree-stream-resource-memory-model=discrete \
  --iree-hal-memoization=true \
  --iree-opt-strip-assertions

fp8_dan.mlir:1813:12: error: failed to legalize operation 'torch.aten.outer'
    %614 = torch.aten.outer %605, %613 : !torch.vtensor<[131072],si64>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32>
           ^
fp8_dan.mlir:1813:12: note: see current operation: %1809 = "torch.aten.outer"(%1798, %1808) : (!torch.vtensor<[131072],si64>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
fp8_dan.mlir:22791:12: error: failed to legalize operation 'torch.aten.outer'
    %637 = torch.aten.outer %628, %636 : !torch.vtensor<[131072],si64>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32>
           ^
fp8_dan.mlir:22791:12: note: see current operation: %1786 = "torch.aten.outer"(%1775, %1785) : (!torch.vtensor<[131072],si64>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
(.venv) ➜  dan pip list
Package                  Version
------------------------ ---------------
accelerate               1.3.0
aiohappyeyeballs         2.4.4
aiohttp                  3.11.11
aiosignal                1.3.2
annotated-types          0.7.0
anyio                    4.8.0
attrs                    25.1.0
certifi                  2025.1.31
cfgv                     3.4.0
charset-normalizer       3.4.1
click                    8.1.8
dataclasses-json         0.6.7
datasets                 3.0.0
diffusers                0.32.2
dill                     0.3.8
distlib                  0.3.9
einops                   0.8.0
execnet                  2.1.1
fastapi                  0.115.8
filelock                 3.17.0
frozenlist               1.5.0
fsspec                   2024.6.1
gguf                     0.14.0
h11                      0.14.0
huggingface-hub          0.28.1
identify                 2.6.6
idna                     3.10
importlib_metadata       8.6.1
iniconfig                2.0.0
inquirerpy               0.3.4
iree-base-compiler       3.2.0rc20250120
iree-base-runtime        3.2.0rc20250120
iree-turbine             3.2.0rc20250121
Jinja2                   3.1.5
MarkupSafe               3.0.2
marshmallow              3.26.0
ml_dtypes                0.5.1
mpmath                   1.3.0
multidict                6.1.0
multiprocess             0.70.16
mypy                     1.8.0
mypy-extensions          1.0.0
networkx                 3.4.2
nodeenv                  1.9.1
numpy                    2.2.2
nvidia-cublas-cu12       12.4.5.8
nvidia-cuda-cupti-cu12   12.4.127
nvidia-cuda-nvrtc-cu12   12.4.127
nvidia-cuda-runtime-cu12 12.4.127
nvidia-cudnn-cu12        9.1.0.70
nvidia-cufft-cu12        11.2.1.3
nvidia-curand-cu12       10.3.5.147
nvidia-cusolver-cu12     11.6.1.9
nvidia-cusparse-cu12     12.3.1.170
nvidia-cusparselt-cu12   0.6.2
nvidia-nccl-cu12         2.21.5
nvidia-nvjitlink-cu12    12.4.127
nvidia-nvtx-cu12         12.4.127
onnx                     1.17.0
packaging                24.2
pandas                   2.2.3
parameterized            0.9.0
pfzy                     0.3.4
pillow                   11.1.0
pip                      22.3.1
platformdirs             4.3.6
pluggy                   1.5.0
pre_commit               4.1.0
prompt_toolkit           3.0.50
propcache                0.2.1
protobuf                 5.29.3
psutil                   6.1.1
pyarrow                  19.0.0
pydantic                 2.10.6
pydantic_core            2.27.2
pytest                   8.0.0
pytest-html              4.1.1
pytest-metadata          3.1.1
pytest-timeout           2.3.1
pytest-xdist             3.5.0
python-dateutil          2.9.0.post0
pytz                     2025.1
PyYAML                   6.0.2
regex                    2024.11.6
requests                 2.32.3
safetensors              0.5.2
sentencepiece            0.2.0
setuptools               65.5.0
six                      1.17.0
sniffio                  1.3.1
starlette                0.45.3
sympy                    1.13.1
tokenizers               0.21.0
torch                    2.6.0
tqdm                     4.67.1
transformers             4.48.0
triton                   3.2.0
types-requests           2.31.0.20240125
typing_extensions        4.12.2
typing-inspect           0.9.0
tzdata                   2025.1
urllib3                  2.3.0
uvicorn                  0.34.0
virtualenv               20.29.1
wcwidth                  0.2.13
wheel                    0.45.1
xxhash                   3.5.0
yarl                     1.18.3
zipp                     3.21.0
@AmosLewis
Copy link
Author

same torch.aten.outer issue after export attenion fp8 with nod-ai/shark-ai#907

 python3 -m sharktank.examples.export_paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8_attn.irpa \
--output-mlir=/home/chi/src/test/llama/dan/f8_attn_chi_castf32.mlir \
--output-config=/home/chi/src/test/llama/dan/config_attn_chi.json \
--bs=1 --attention-kernel sharktank \
--attention-dtype=float8_e4m3fnuz --activation-dtype=bfloat16 --use-attention-mask --use-hf
/home/chi/src/iree-build/tools/iree-compile f8_attn_chi_castf32.mlir \
  --iree-hip-target=gfx942 \
  -o=f8_attn_chi_castf32.vmfb \
  --iree-hal-target-device=hip \
  --iree-dispatch-creation-enable-aggressive-fusion=true \
  --iree-global-opt-propagate-transposes=true \
  --iree-opt-aggressively-propagate-transposes=true \
  --iree-opt-data-tiling=false \
  --iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \
  --iree-hal-indirect-command-buffers=true \
  --iree-stream-resource-memory-model=discrete \
  --iree-hal-memoization=true \
  --iree-opt-strip-assertions
f8_attn_chi_castf32.mlir:2248:12: error: failed to legalize operation 'torch.aten.outer'
    %761 = torch.aten.outer %759, %760 : !torch.vtensor<[131072],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32>
           ^
f8_attn_chi_castf32.mlir:2248:12: note: see current operation: %2150 = "torch.aten.outer"(%2149, %2146) : (!torch.vtensor<[131072],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
f8_attn_chi_castf32.mlir:28904:12: error: failed to legalize operation 'torch.aten.outer'
    %755 = torch.aten.outer %753, %754 : !torch.vtensor<[131072],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32>
           ^
f8_attn_chi_castf32.mlir:28904:12: note: see current operation: %2069 = "torch.aten.outer"(%2068, %2065) : (!torch.vtensor<[131072],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment