Last active
February 24, 2025 22:40
-
-
Save AmosLewis/aae4cd7b564abcc44a0b1ca428d48828 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
iree-base-compiler 3.3.0rc20250215 | |
iree-base-runtime 3.3.0rc20250215 | |
iree-turbine 3.3.0rc20250215 | |
iree-compile /sharedfile/128/fp8_128.mlir \ | |
--iree-hip-target=gfx942 \ | |
-o=/sharedfile/128/fp8_128_0224_ir0215.vmfb \ | |
--iree-hal-target-device=hip \ | |
--iree-dispatch-creation-enable-aggressive-fusion=true \ | |
--iree-global-opt-propagate-transposes=true \ | |
--iree-opt-aggressively-propagate-transposes=true \ | |
--iree-opt-data-tiling=false \ | |
--iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-opt-strip-assertions | |
# /sharedfile/128/fp8_128.mlir:2888:12: error: 'flow.tensor.bitcast' op value set has 1 dynamic dimensions but only 0 dimension values are attached | |
# %990 = torch.aten.view.dtype %979, %int1_229 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[?,32,8,128],si8> | |
# ^ | |
# /sharedfile/128/fp8_128.mlir:2888:12: note: see current operation: %1819 = "flow.tensor.bitcast"(%1818) <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (tensor<?x32x8x128xf8E4M3FNUZ>) -> tensor<?x32x8x128xi8> | |
# /sharedfile/128/fp8_128.mlir:33785:13: error: 'flow.tensor.bitcast' op value set has 1 dynamic dimensions but only 0 dimension values are attached | |
# %1022 = torch.aten.view.dtype %1004, %int1_182 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[?,32,2,32,8,128],si8> | |
# ^ | |
# /sharedfile/128/fp8_128.mlir:33785:13: note: see current operation: %1803 = "flow.tensor.bitcast"(%1802) <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (tensor<?x32x2x32x8x128xf8E4M3FNUZ>) -> tensor<?x32x2x32x8x128xi8> | |
iree-base-compiler 3.3.0rc20250214 | |
iree-base-runtime 3.3.0rc20250214 | |
iree-turbine 3.3.0rc20250214 | |
iree-compile /sharedfile/128/fp8_128.mlir \ | |
--iree-hip-target=gfx942 \ | |
-o=/sharedfile/128/fp8_128_0224_ir0214.vmfb \ | |
--iree-hal-target-device=hip \ | |
--iree-dispatch-creation-enable-aggressive-fusion=true \ | |
--iree-global-opt-propagate-transposes=true \ | |
--iree-opt-aggressively-propagate-transposes=true \ | |
--iree-opt-data-tiling=false \ | |
--iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-opt-strip-assertions | |
# /sharedfile/128/fp8_128.mlir:2888:12: error: 'flow.tensor.bitcast' op value set has 1 dynamic dimensions but only 0 dimension values are attached | |
# %990 = torch.aten.view.dtype %979, %int1_229 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[?,32,8,128],si8> | |
# ^ | |
# /sharedfile/128/fp8_128.mlir:2888:12: note: see current operation: %1819 = "flow.tensor.bitcast"(%1818) <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (tensor<?x32x8x128xf8E4M3FNUZ>) -> tensor<?x32x8x128xi8> | |
# /sharedfile/128/fp8_128.mlir:33785:13: error: 'flow.tensor.bitcast' op value set has 1 dynamic dimensions but only 0 dimension values are attached | |
# %1022 = torch.aten.view.dtype %1004, %int1_182 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[?,32,2,32,8,128],si8> | |
# ^ | |
# /sharedfile/128/fp8_128.mlir:33785:13: note: see current operation: %1803 = "flow.tensor.bitcast"(%1802) <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (tensor<?x32x2x32x8x128xf8E4M3FNUZ>) -> tensor<?x32x2x32x8x128xi8> | |
iree-base-compiler 3.3.0rc20250224 | |
iree-base-runtime 3.3.0rc20250224 | |
iree-turbine 3.3.0rc20250224 | |
iree-compile /sharedfile/128/fp8_128.mlir \ | |
--iree-hip-target=gfx942 \ | |
-o=/sharedfile/128/fp8_128_0224_ir0224.vmfb \ | |
--iree-hal-target-device=hip \ | |
--iree-dispatch-creation-enable-aggressive-fusion=true \ | |
--iree-global-opt-propagate-transposes=true \ | |
--iree-opt-aggressively-propagate-transposes=true \ | |
--iree-opt-data-tiling=false \ | |
--iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' \ | |
--iree-hal-indirect-command-buffers=true \ | |
--iree-stream-resource-memory-model=discrete \ | |
--iree-hal-memoization=true \ | |
--iree-opt-strip-assertions | |
ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
iree-benchmark-module \ | |
--hip_use_streams=true \ | |
--module=/sharedfile/128/fp8_128_0224_ir0224.vmfb \ | |
--parameters=model=/sharedfile/llama3_8b_fp8.irpa \ | |
--device=hip://4 \ | |
--function=prefill_bs4 \ | |
--input=4x128xi64=@/sharedfile/128/prefill/prefill_token_ids_4x128xi64.bin \ | |
--input=4xi64=@/sharedfile/128/prefill/prefill_seq_lens_4xi64.bin \ | |
--input=4x4xi64=@/sharedfile/128/prefill/prefill_seq_block_ids_4x4xi64.bin \ | |
--input=261x2097152xf8E4M3FNUZ=@/sharedfile/128/prefill/prefill_cache_state_261x2097152xf8E4M3FNUZ.bin \ | |
--benchmark_repetitions=3 | |
# 2025-02-24T14:42:08-08:00 | |
# Running /home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/_runtime_libs/iree-benchmark-module | |
# Run on (96 X 3810.79 MHz CPU s) | |
# CPU Caches: | |
# L1 Data 32 KiB (x96) | |
# L1 Instruction 32 KiB (x96) | |
# L2 Unified 1024 KiB (x96) | |
# L3 Unified 32768 KiB (x16) | |
# Load Average: 9.07, 9.11, 19.93 | |
# ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. | |
# ------------------------------------------------------------------------------------------------------- | |
# Benchmark Time CPU Iterations UserCounters... | |
# ------------------------------------------------------------------------------------------------------- | |
# BM_prefill_bs4/process_time/real_time 123 ms 124 ms 6 items_per_second=8.12122/s | |
# BM_prefill_bs4/process_time/real_time 123 ms 124 ms 6 items_per_second=8.11833/s | |
# BM_prefill_bs4/process_time/real_time 123 ms 124 ms 6 items_per_second=8.11331/s | |
# BM_prefill_bs4/process_time/real_time_mean 123 ms 124 ms 3 items_per_second=8.11762/s | |
# BM_prefill_bs4/process_time/real_time_median 123 ms 124 ms 3 items_per_second=8.11833/s | |
# BM_prefill_bs4/process_time/real_time_stddev 0.061 ms 0.086 ms 3 items_per_second=4.00454m/s | |
# BM_prefill_bs4/process_time/real_time_cv 0.05 % 0.07 % 3 items_per_second=0.05% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment