Created
April 3, 2023 16:11
-
-
Save pashu123/26b428760d94a553c69eca671a422fdb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #composite_of_1731821440b = #util.composite<1731821440xi8, [ | |
| dense_resource<__elided__> : tensor<320x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x5120xf16>, | |
| dense_resource<__elided__> : tensor<2560x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x5120xf16>, | |
| dense_resource<__elided__> : tensor<2560x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1024x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x10240xf16>, | |
| dense_resource<__elided__> : tensor<5120x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x1280xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x5120xf16>, | |
| dense_resource<__elided__> : tensor<2560x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x5120xf16>, | |
| dense_resource<__elided__> : tensor<2560x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<1024x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<640x5120xf16>, | |
| dense_resource<__elided__> : tensor<2560x640xf16>, | |
| dense_resource<__elided__> : tensor<640x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<1024x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x320xf16>, | |
| dense_resource<__elided__> : tensor<320x320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1920xf16>, | |
| dense_resource<__elided__> : tensor<1920xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1920xf16>, | |
| dense_resource<__elided__> : tensor<1920xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<960xf16>, | |
| dense_resource<__elided__> : tensor<960xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<960xf16>, | |
| dense_resource<__elided__> : tensor<960xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320x36xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<640x2880xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x320xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<1280x5760xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x640xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x23040xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x23040xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x23040xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x23040xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x23040xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x2560xf16>, | |
| dense_resource<__elided__> : tensor<1280x17280xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<1280x1920xf16>, | |
| dense_resource<__elided__> : tensor<1280x11520xf16>, | |
| dense_resource<__elided__> : tensor<640x17280xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x1920xf16>, | |
| dense_resource<__elided__> : tensor<640x11520xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x1280xf16>, | |
| dense_resource<__elided__> : tensor<640x8640xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<640x960xf16>, | |
| dense_resource<__elided__> : tensor<640x5760xf16>, | |
| dense_resource<__elided__> : tensor<320x8640xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x960xf16>, | |
| dense_resource<__elided__> : tensor<320x5760xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x640xf16>, | |
| dense_resource<__elided__> : tensor<320x5760xf16>, | |
| dense_resource<__elided__> : tensor<320x2880xf16>, | |
| dense_resource<__elided__> : tensor<320x640xf16>, | |
| dense_resource<__elided__> : tensor<4x2880xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<5120xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<5120xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<10240xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<1280xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<5120xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<5120xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<5120xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<640xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<2560xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| dense_resource<__elided__> : tensor<320xf16>, | |
| ]> | |
| #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> | |
| #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]> | |
| #pipeline_layout1 = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]> | |
| #pipeline_layout2 = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout3 = #hal.pipeline.layout<push_constants = 3, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout4 = #hal.pipeline.layout<push_constants = 3, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]> | |
| #pipeline_layout5 = #hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]> | |
| #pipeline_layout6 = #hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout7 = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout8 = #hal.pipeline.layout<push_constants = 5, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout9 = #hal.pipeline.layout<push_constants = 6, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout10 = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> | |
| #pipeline_layout11 = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]> | |
| #translation = #iree_codegen.translation_info<SPIRVBaseDistribute> | |
| #translation1 = #iree_codegen.translation_info<SPIRVBaseVectorize> | |
| #translation2 = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize store_stage = 0> | |
| #translation3 = #iree_codegen.translation_info<SPIRVSubgroupReduce> | |
| #translation4 = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize pipeline_depth = 1 store_stage = 0> | |
| #translation5 = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize pipeline_depth = 1> | |
| #device_target_vulkan = #hal.device.target<"vulkan", {executable_targets = [#executable_target_vulkan_spirv_fb], legacy_sync}> | |
| module attributes {hal.device.targets = [#device_target_vulkan], torch.debug_module_name = "_lambda"} { | |
| util.global private mutable @_constant__timepoint : !hal.fence | |
| util.global private @_constant : !hal.buffer | |
| util.initializer { | |
| %0 = util.null : !hal.fence | |
| %c1731821440 = arith.constant 1731821440 : index | |
| %c0 = arith.constant 0 : index | |
| %buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_1731821440b | |
| %device = hal.ex.shared_device : !hal.device | |
| %allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
| %did_map, %mapped = hal.allocator.try_map<%allocator : !hal.allocator> source(%buffer_cst : !util.buffer)[%c0, %c1731821440] type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|SharingImmutable") : i1, !hal.buffer | |
| cf.cond_br %did_map, ^bb2(%mapped, %0 : !hal.buffer, !hal.fence), ^bb1 | |
| ^bb1: // pred: ^bb0 | |
| %device_0 = hal.ex.shared_device : !hal.device | |
| %allocator_1 = hal.device.allocator<%device_0 : !hal.device> : !hal.allocator | |
| %mapped_2 = hal.allocator.allocate.initialized<%allocator_1 : !hal.allocator> source(%buffer_cst : !util.buffer)[%c0, %c1731821440] type("HostVisible|HostCoherent|HostLocal|DeviceVisible") usage("TransferSource|TransferTarget|Transfer|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer | |
| %device_3 = hal.ex.shared_device : !hal.device | |
| %allocator_4 = hal.device.allocator<%device_3 : !hal.device> : !hal.allocator | |
| %buffer = hal.allocator.allocate<%allocator_4 : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|SharingImmutable") : !hal.buffer{%c1731821440} | |
| %device_5 = hal.ex.shared_device : !hal.device | |
| %c-1_i64 = arith.constant -1 : i64 | |
| %cmd = hal.command_buffer.create device(%device_5 : !hal.device) mode("OneShot|AllowInlineExecution") categories(Transfer) : !hal.command_buffer | |
| hal.command_buffer.copy_buffer<%cmd : !hal.command_buffer> source(%mapped_2 : !hal.buffer)[%c0] target(%buffer : !hal.buffer)[%c0] length(%c1731821440) | |
| hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
| hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
| %1 = util.null : !hal.fence | |
| %fence = hal.fence.create device(%device_5 : !hal.device) flags("None") : !hal.fence | |
| hal.device.queue.execute<%device_5 : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd]) | |
| cf.br ^bb2(%buffer, %fence : !hal.buffer, !hal.fence) | |
| ^bb2(%2: !hal.buffer, %3: !hal.fence): // 2 preds: ^bb0, ^bb1 | |
| util.global.store %2, @_constant : !hal.buffer | |
| util.global.store %3, @_constant__timepoint : !hal.fence | |
| util.initializer.return | |
| } | |
| hal.executable private @forward_dispatch_0 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_0_generic_2x160 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [StorageBuffer16BitAccess, Shader, Float16, Int64], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_0_generic_2x160() "None" { | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f32 = spirv.Constant 0.693147182 : f32 | |
| %cst_f32_0 = spirv.Constant 1.44269502 : f32 | |
| %cst_f32_1 = spirv.Constant 1.000000e+00 : f32 | |
| %cst_f32_2 = spirv.Constant 0.499705136 : f32 | |
| %cst_f32_3 = spirv.Constant 0.168738902 : f32 | |
| %cst_f32_4 = spirv.Constant 0.0366896503 : f32 | |
| %cst_f32_5 = spirv.Constant 1.314350e-02 : f32 | |
| %cst23_i32 = spirv.Constant 23 : i32 | |
| %cst127_i32 = spirv.Constant 127 : i32 | |
| %cst_f32_6 = spirv.Constant 0.000000e+00 : f32 | |
| %cst_f32_7 = spirv.Constant 0x7F800000 : f32 | |
| %cst_f32_8 = spirv.Constant 0xFF800000 : f32 | |
| %cst_f32_9 = spirv.Constant 1.17549435E-38 : f32 | |
| %cst-127_i32 = spirv.Constant -127 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %cst_f32_10 = spirv.Constant -9.21033954 : f32 | |
| %cst_f16_11 = spirv.Constant 1.600000e+02 : f16 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32> | |
| %8 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %9 = spirv.Load "StorageBuffer" %8 : f16 | |
| %10 = spirv.IMul %3, %cst32_i32 : i32 | |
| %11 = spirv.IAdd %10, %5 : i32 | |
| %12 = spirv.SConvert %11 : i32 to i64 | |
| %13 = spirv.ConvertSToF %12 : i64 to f16 | |
| %14 = spirv.FAdd %13, %cst_f16 : f16 | |
| %15 = spirv.FConvert %cst_f32_10 : f32 to f16 | |
| %16 = spirv.FMul %14, %15 : f16 | |
| %17 = spirv.FDiv %16, %cst_f16_11 : f16 | |
| %18 = spirv.FConvert %17 : f16 to f32 | |
| %19 = spirv.IsNan %18 : f32 | |
| %20 = spirv.LogicalOr %19, %19 : i1 | |
| %21 = spirv.FMul %18, %cst_f32_0 : f32 | |
| %22 = spirv.GL.Floor %21 : f32 | |
| %23 = spirv.FMul %22, %cst_f32 : f32 | |
| %24 = spirv.FSub %18, %23 : f32 | |
| %25 = spirv.FMul %24, %24 : f32 | |
| %26 = spirv.FMul %25, %25 : f32 | |
| %27 = spirv.GL.Fma %cst_f32_1, %24, %cst_f32_1 : f32 | |
| %28 = spirv.GL.Fma %cst_f32_3, %24, %cst_f32_2 : f32 | |
| %29 = spirv.GL.Fma %cst_f32_5, %24, %cst_f32_4 : f32 | |
| %30 = spirv.GL.Fma %28, %25, %27 : f32 | |
| %31 = spirv.GL.Fma %29, %26, %30 : f32 | |
| %32 = spirv.ConvertFToS %22 : f32 to i32 | |
| %33 = spirv.IAdd %32, %cst127_i32 : i32 | |
| %34 = spirv.ShiftLeftLogical %33, %cst23_i32 : i32, i32 | |
| %35 = spirv.Bitcast %34 : i32 to f32 | |
| %36 = spirv.FMul %31, %35 : f32 | |
| %37 = spirv.SLessThanEqual %32, %cst127_i32 : i32 | |
| %38 = spirv.SGreaterThanEqual %32, %cst-127_i32 : i32 | |
| %39 = spirv.FOrdEqual %18, %cst_f32_8 : f32 | |
| %40 = spirv.FOrdEqual %18, %cst_f32_7 : f32 | |
| %41 = spirv.FOrdGreaterThan %18, %cst_f32_6 : f32 | |
| %42 = spirv.LogicalAnd %37, %38 : i1 | |
| %43 = spirv.Select %41, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %44 = spirv.Select %42, %36, %43 : i1, f32 | |
| %45 = spirv.Select %40, %cst_f32_7, %44 : i1, f32 | |
| %46 = spirv.Select %39, %cst_f32_6, %45 : i1, f32 | |
| %47 = spirv.Select %20, %18, %46 : i1, f32 | |
| %48 = spirv.FConvert %47 : f32 to f16 | |
| %49 = spirv.FMul %9, %48 : f16 | |
| %50 = spirv.IMul %1, %cst160_i32 : i32 | |
| %51 = spirv.IMul %7, %cst160_i32 : i32 | |
| %52 = spirv.IAdd %50, %51 : i32 | |
| %53 = spirv.IAdd %52, %5 : i32 | |
| %54 = spirv.IAdd %53, %10 : i32 | |
| %55 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %55, %49 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_0_generic_2x160, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_0_generic_2x160 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_1 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_1_generic_320 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index): | |
| %c10 = arith.constant 10 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c10, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_1_generic_320() "None" { | |
| %cst77152_i32 = spirv.Constant 77152 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 0.636619746 : f32 | |
| %cst_f32_0 = spirv.Constant 1.57079637 : f32 | |
| %cst_f32_1 = spirv.Constant 1.000000e+00 : f32 | |
| %cst_f32_2 = spirv.Constant -1.000000e+00 : f32 | |
| %cst_f32_3 = spirv.Constant -0.166666672 : f32 | |
| %cst_f32_4 = spirv.Constant 0.00833334774 : f32 | |
| %cst_f32_5 = spirv.Constant -1.98426045E-4 : f32 | |
| %cst_f32_6 = spirv.Constant 2.76001265E-6 : f32 | |
| %cst_f32_7 = spirv.Constant -2.50293279E-8 : f32 | |
| %cst_f32_8 = spirv.Constant -5.000000e-01 : f32 | |
| %cst_f32_9 = spirv.Constant 0.0416666418 : f32 | |
| %cst_f32_10 = spirv.Constant -0.00138883304 : f32 | |
| %cst_f32_11 = spirv.Constant 2.47562348E-5 : f32 | |
| %cst_f32_12 = spirv.Constant -2.59630184E-7 : f32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %2 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32> | |
| %4 = spirv.IMul %1, %cst32_i32 : i32 | |
| %5 = spirv.IAdd %3, %4 : i32 | |
| %6 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %5] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %7 = spirv.Load "StorageBuffer" %6 : f16 | |
| %8 = spirv.FConvert %7 : f16 to f32 | |
| %9 = spirv.FMul %8, %cst_f32 : f32 | |
| %10 = spirv.GL.Floor %9 : f32 | |
| %11 = spirv.FMul %10, %cst_f32_0 : f32 | |
| %12 = spirv.FSub %8, %11 : f32 | |
| %13 = spirv.ConvertFToS %10 : f32 to i32 | |
| %14 = spirv.BitwiseAnd %13, %cst3_i32 : i32 | |
| %15 = spirv.IEqual %14, %cst1_i32 : i32 | |
| %16 = spirv.IEqual %14, %cst3_i32 : i32 | |
| %17 = spirv.LogicalOr %15, %16 : i1 | |
| %18 = spirv.SGreaterThan %14, %cst1_i32 : i32 | |
| %19 = spirv.FMul %12, %12 : f32 | |
| %20 = spirv.Select %17, %cst_f32_1, %12 : i1, f32 | |
| %21 = spirv.Select %17, %cst_f32_8, %cst_f32_3 : i1, f32 | |
| %22 = spirv.Select %17, %cst_f32_9, %cst_f32_4 : i1, f32 | |
| %23 = spirv.Select %17, %cst_f32_10, %cst_f32_5 : i1, f32 | |
| %24 = spirv.Select %17, %cst_f32_11, %cst_f32_6 : i1, f32 | |
| %25 = spirv.Select %17, %cst_f32_12, %cst_f32_7 : i1, f32 | |
| %26 = spirv.GL.Fma %19, %25, %24 : f32 | |
| %27 = spirv.GL.Fma %19, %26, %23 : f32 | |
| %28 = spirv.GL.Fma %19, %27, %22 : f32 | |
| %29 = spirv.GL.Fma %19, %28, %21 : f32 | |
| %30 = spirv.GL.Fma %19, %29, %cst_f32_1 : f32 | |
| %31 = spirv.FMul %20, %30 : f32 | |
| %32 = spirv.FMul %31, %cst_f32_2 : f32 | |
| %33 = spirv.Select %18, %32, %31 : i1, f32 | |
| %34 = spirv.FConvert %33 : f32 to f16 | |
| %35 = spirv.IAdd %5, %cst77152_i32 : i32 | |
| %36 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %36, %34 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_1_generic_320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_1_generic_320 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_2 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_2_generic_320 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index): | |
| %c10 = arith.constant 10 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c10, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_2_generic_320() "None" { | |
| %cst78112_i32 = spirv.Constant 78112 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 0.636619746 : f32 | |
| %cst_f32_0 = spirv.Constant 1.57079637 : f32 | |
| %cst_f32_1 = spirv.Constant 1.000000e+00 : f32 | |
| %cst_f32_2 = spirv.Constant -1.000000e+00 : f32 | |
| %cst_f32_3 = spirv.Constant -0.166666672 : f32 | |
| %cst_f32_4 = spirv.Constant 0.00833334774 : f32 | |
| %cst_f32_5 = spirv.Constant -1.98426045E-4 : f32 | |
| %cst_f32_6 = spirv.Constant 2.76001265E-6 : f32 | |
| %cst_f32_7 = spirv.Constant -2.50293279E-8 : f32 | |
| %cst_f32_8 = spirv.Constant -5.000000e-01 : f32 | |
| %cst_f32_9 = spirv.Constant 0.0416666418 : f32 | |
| %cst_f32_10 = spirv.Constant -0.00138883304 : f32 | |
| %cst_f32_11 = spirv.Constant 2.47562348E-5 : f32 | |
| %cst_f32_12 = spirv.Constant -2.59630184E-7 : f32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %2 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32> | |
| %4 = spirv.IMul %1, %cst32_i32 : i32 | |
| %5 = spirv.IAdd %3, %4 : i32 | |
| %6 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %5] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %7 = spirv.Load "StorageBuffer" %6 : f16 | |
| %8 = spirv.FConvert %7 : f16 to f32 | |
| %9 = spirv.FMul %8, %cst_f32 : f32 | |
| %10 = spirv.GL.Floor %9 : f32 | |
| %11 = spirv.FMul %10, %cst_f32_0 : f32 | |
| %12 = spirv.FSub %8, %11 : f32 | |
| %13 = spirv.ConvertFToS %10 : f32 to i32 | |
| %14 = spirv.BitwiseAnd %13, %cst3_i32 : i32 | |
| %15 = spirv.IEqual %14, %cst0_i32 : i32 | |
| %16 = spirv.IEqual %14, %cst1_i32 : i32 | |
| %17 = spirv.IEqual %14, %cst2_i32 : i32 | |
| %18 = spirv.LogicalOr %15, %17 : i1 | |
| %19 = spirv.LogicalOr %16, %17 : i1 | |
| %20 = spirv.FMul %12, %12 : f32 | |
| %21 = spirv.Select %18, %cst_f32_1, %12 : i1, f32 | |
| %22 = spirv.Select %18, %cst_f32_8, %cst_f32_3 : i1, f32 | |
| %23 = spirv.Select %18, %cst_f32_9, %cst_f32_4 : i1, f32 | |
| %24 = spirv.Select %18, %cst_f32_10, %cst_f32_5 : i1, f32 | |
| %25 = spirv.Select %18, %cst_f32_11, %cst_f32_6 : i1, f32 | |
| %26 = spirv.Select %18, %cst_f32_12, %cst_f32_7 : i1, f32 | |
| %27 = spirv.GL.Fma %20, %26, %25 : f32 | |
| %28 = spirv.GL.Fma %20, %27, %24 : f32 | |
| %29 = spirv.GL.Fma %20, %28, %23 : f32 | |
| %30 = spirv.GL.Fma %20, %29, %22 : f32 | |
| %31 = spirv.GL.Fma %20, %30, %cst_f32_1 : f32 | |
| %32 = spirv.FMul %21, %31 : f32 | |
| %33 = spirv.FMul %32, %cst_f32_2 : f32 | |
| %34 = spirv.Select %19, %33, %32 : i1, f32 | |
| %35 = spirv.FConvert %34 : f32 to f16 | |
| %36 = spirv.IAdd %5, %cst78112_i32 : i32 | |
| %37 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %36] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %37, %35 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_2_generic_320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_2_generic_320 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_3 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_3 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_3() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %20 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32> | |
| %22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[1 : i32] : vector<3xi32> | |
| %24 = spirv.IMul %19, %cst160_i32 : i32 | |
| %25 = spirv.IMul %23, %cst160_i32 : i32 | |
| %26 = spirv.IAdd %24, %25 : i32 | |
| %27 = spirv.IAdd %26, %21 : i32 | |
| %28 = spirv.IMul %17, %cst32_i32 : i32 | |
| %29 = spirv.IAdd %27, %28 : i32 | |
| %30 = spirv.IAdd %29, %9 : i32 | |
| %31 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %32 = spirv.Load "StorageBuffer" %31 : f16 | |
| %33 = spirv.IMul %19, %cst320_i32 : i32 | |
| %34 = spirv.IMul %23, %cst320_i32 : i32 | |
| %35 = spirv.IAdd %33, %34 : i32 | |
| %36 = spirv.IAdd %35, %21 : i32 | |
| %37 = spirv.IAdd %36, %28 : i32 | |
| %38 = spirv.IAdd %37, %15 : i32 | |
| %39 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %39, %32 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_3 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_4 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_4 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_4() "None" { | |
| %cst77632_i32 = spirv.Constant 77632 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst78112_i32 = spirv.Constant 78112 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32> | |
| %8 = spirv.IMul %3, %cst160_i32 : i32 | |
| %9 = spirv.IMul %7, %cst160_i32 : i32 | |
| %10 = spirv.IAdd %8, %9 : i32 | |
| %11 = spirv.IAdd %10, %5 : i32 | |
| %12 = spirv.IMul %1, %cst32_i32 : i32 | |
| %13 = spirv.IAdd %11, %12 : i32 | |
| %14 = spirv.IAdd %13, %cst78112_i32 : i32 | |
| %15 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %14] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %16 = spirv.Load "StorageBuffer" %15 : f16 | |
| %17 = spirv.IMul %3, %cst320_i32 : i32 | |
| %18 = spirv.IMul %7, %cst320_i32 : i32 | |
| %19 = spirv.IAdd %17, %18 : i32 | |
| %20 = spirv.IAdd %19, %5 : i32 | |
| %21 = spirv.IAdd %20, %12 : i32 | |
| %22 = spirv.IAdd %21, %cst77632_i32 : i32 | |
| %23 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %23, %16 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_4 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_6 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_6 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_6() "None" { | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst77472_i32 = spirv.Constant 77472 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32> | |
| %8 = spirv.IMul %3, %cst320_i32 : i32 | |
| %9 = spirv.IMul %7, %cst320_i32 : i32 | |
| %10 = spirv.IAdd %8, %9 : i32 | |
| %11 = spirv.IAdd %10, %5 : i32 | |
| %12 = spirv.IMul %1, %cst32_i32 : i32 | |
| %13 = spirv.IAdd %11, %12 : i32 | |
| %14 = spirv.IAdd %13, %cst77472_i32 : i32 | |
| %15 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %14] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %16 = spirv.Load "StorageBuffer" %15 : f16 | |
| %17 = spirv.IAdd %13, %cst160_i32 : i32 | |
| %18 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %17] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %18, %16 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_6, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_6 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_7 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_7_matmul_32x1280x320 ordinal(0) layout(#pipeline_layout2) attributes {subgroup_size = 32 : index, translation_info = #translation2, workgroup_size = [128 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_7_matmul_32x1280x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %false = spirv.Constant false | |
| %cst1392_i32 = spirv.Constant 1392 : i32 | |
| %cst1394_i32 = spirv.Constant 1394 : i32 | |
| %cst1396_i32 = spirv.Constant 1396 : i32 | |
| %cst1398_i32 = spirv.Constant 1398 : i32 | |
| %cst2560_i32 = spirv.Constant 2560 : i32 | |
| %cst534_i32 = spirv.Constant 534 : i32 | |
| %cst532_i32 = spirv.Constant 532 : i32 | |
| %cst530_i32 = spirv.Constant 530 : i32 | |
| %cst528_i32 = spirv.Constant 528 : i32 | |
| %cst6_i32 = spirv.Constant 6 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst33_i32 = spirv.Constant 33 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst1024_i32 = spirv.Constant 1024 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup> | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[1 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[0 : i32] : vector<3xi32> | |
| %7 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %8 = spirv.CompositeExtract %7[1 : i32] : vector<3xi32> | |
| %9 = spirv.IMul %6, %cst8_i32 : i32 | |
| %10 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %11 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %12 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %13 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>) | |
| ^bb1(%39: i32, %40: !spirv.coopmatrix<16x16xf16, Subgroup>, %41: !spirv.coopmatrix<16x16xf16, Subgroup>, %42: !spirv.coopmatrix<16x16xf16, Subgroup>, %43: !spirv.coopmatrix<16x16xf16, Subgroup>): // 2 preds: ^bb0, ^bb2 | |
| %44 = spirv.SLessThan %39, %cst320_i32 : i32 | |
| spirv.BranchConditional %44, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%8 : i32) | |
| ^bb1(%90: i32): // 2 preds: ^bb0, ^bb2 | |
| %91 = spirv.SLessThan %90, %cst16_i32 : i32 | |
| spirv.BranchConditional %91, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%9 : i32) | |
| ^bb1(%93: i32): // 2 preds: ^bb0, ^bb2 | |
| %94 = spirv.SLessThan %93, %cst32_i32 : i32 | |
| spirv.BranchConditional %94, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %95 = spirv.IMul %90, %cst40_i32 : i32 | |
| %96 = spirv.IMul %2, %cst640_i32 : i32 | |
| %97 = spirv.IAdd %95, %96 : i32 | |
| %98 = spirv.IAdd %39, %93 : i32 | |
| %99 = spirv.SLessThan %98, %cst0_i32 : i32 | |
| %100 = spirv.ISub %cst-1_i32, %98 : i32 | |
| %101 = spirv.Select %99, %100, %98 : i1, i32 | |
| %102 = spirv.SDiv %101, %cst8_i32 : i32 | |
| %103 = spirv.ISub %cst-1_i32, %102 : i32 | |
| %104 = spirv.Select %99, %103, %102 : i1, i32 | |
| %105 = spirv.IAdd %97, %104 : i32 | |
| %106 = spirv.IAdd %105, %cst80_i32 : i32 | |
| %107 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %108 = spirv.Load "StorageBuffer" %107 : vector<4xf32> | |
| %109 = spirv.IMul %90, %cst5_i32 : i32 | |
| %110 = spirv.SLessThan %93, %cst0_i32 : i32 | |
| %111 = spirv.ISub %cst-1_i32, %93 : i32 | |
| %112 = spirv.Select %110, %111, %93 : i1, i32 | |
| %113 = spirv.SDiv %112, %cst8_i32 : i32 | |
| %114 = spirv.ISub %cst-1_i32, %113 : i32 | |
| %115 = spirv.Select %110, %114, %113 : i1, i32 | |
| %116 = spirv.IAdd %109, %115 : i32 | |
| %117 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %116] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %117, %108 : vector<4xf32> | |
| %118 = spirv.IAdd %93, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%118 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %92 = spirv.IAdd %90, %cst1_i32 : i32 | |
| spirv.Branch ^bb1(%92 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%8 : i32) | |
| ^bb1(%90: i32): // 2 preds: ^bb0, ^bb2 | |
| %91 = spirv.SLessThan %90, %cst32_i32 : i32 | |
| spirv.BranchConditional %91, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%9 : i32) | |
| ^bb1(%93: i32): // 2 preds: ^bb0, ^bb2 | |
| %94 = spirv.SLessThan %93, %cst256_i32 : i32 | |
| spirv.BranchConditional %94, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %95 = spirv.IMul %39, %cst160_i32 : i32 | |
| %96 = spirv.IMul %90, %cst160_i32 : i32 | |
| %97 = spirv.IAdd %95, %96 : i32 | |
| %98 = spirv.IMul %4, %cst32_i32 : i32 | |
| %99 = spirv.IAdd %97, %98 : i32 | |
| %100 = spirv.SLessThan %93, %cst0_i32 : i32 | |
| %101 = spirv.ISub %cst-1_i32, %93 : i32 | |
| %102 = spirv.Select %100, %101, %93 : i1, i32 | |
| %103 = spirv.SDiv %102, %cst8_i32 : i32 | |
| %104 = spirv.ISub %cst-1_i32, %103 : i32 | |
| %105 = spirv.Select %100, %104, %103 : i1, i32 | |
| %106 = spirv.IAdd %99, %105 : i32 | |
| %107 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %108 = spirv.Load "StorageBuffer" %107 : vector<4xf32> | |
| %109 = spirv.IMul %90, %cst33_i32 : i32 | |
| %110 = spirv.IAdd %109, %105 : i32 | |
| %111 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %110] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %111, %108 : vector<4xf32> | |
| %112 = spirv.IAdd %93, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%112 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %92 = spirv.IAdd %90, %cst1_i32 : i32 | |
| spirv.Branch ^bb1(%92 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %45 = spirv.IMul %8, %cst80_i32 : i32 | |
| %46 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %45] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %47 = spirv.NV.CooperativeMatrixLoad %46, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %48 = spirv.IAdd %45, %cst2_i32 : i32 | |
| %49 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %50 = spirv.NV.CooperativeMatrixLoad %49, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %51 = spirv.SLessThan %6, %cst0_i32 : i32 | |
| %52 = spirv.ISub %cst-1_i32, %6 : i32 | |
| %53 = spirv.Select %51, %52, %6 : i1, i32 | |
| %54 = spirv.SDiv %53, %cst32_i32 : i32 | |
| %55 = spirv.ISub %cst-1_i32, %54 : i32 | |
| %56 = spirv.Select %51, %55, %54 : i1, i32 | |
| %57 = spirv.IMul %56, %cst8_i32 : i32 | |
| %58 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %59 = spirv.NV.CooperativeMatrixLoad %58, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %60 = spirv.IAdd %57, %cst2_i32 : i32 | |
| %61 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %62 = spirv.NV.CooperativeMatrixLoad %61, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %63 = spirv.IAdd %57, %cst4_i32 : i32 | |
| %64 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %63] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %65 = spirv.NV.CooperativeMatrixLoad %64, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %66 = spirv.IAdd %57, %cst6_i32 : i32 | |
| %67 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %68 = spirv.NV.CooperativeMatrixLoad %67, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %69 = spirv.IAdd %57, %cst528_i32 : i32 | |
| %70 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %71 = spirv.NV.CooperativeMatrixLoad %70, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %72 = spirv.IAdd %57, %cst530_i32 : i32 | |
| %73 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %74 = spirv.NV.CooperativeMatrixLoad %73, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %75 = spirv.IAdd %57, %cst532_i32 : i32 | |
| %76 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %75] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %77 = spirv.NV.CooperativeMatrixLoad %76, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %78 = spirv.IAdd %57, %cst534_i32 : i32 | |
| %79 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %78] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %80 = spirv.NV.CooperativeMatrixLoad %79, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %81 = spirv.NV.CooperativeMatrixMulAdd %47, %59, %40 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %82 = spirv.NV.CooperativeMatrixMulAdd %50, %71, %81 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %83 = spirv.NV.CooperativeMatrixMulAdd %47, %62, %41 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %84 = spirv.NV.CooperativeMatrixMulAdd %50, %74, %83 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %85 = spirv.NV.CooperativeMatrixMulAdd %47, %65, %42 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %86 = spirv.NV.CooperativeMatrixMulAdd %50, %77, %85 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %87 = spirv.NV.CooperativeMatrixMulAdd %47, %68, %43 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %88 = spirv.NV.CooperativeMatrixMulAdd %50, %80, %87 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %10, %82 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %11, %84 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %12, %86 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %13, %88 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %89 = spirv.IAdd %39, %cst32_i32 : i32 | |
| spirv.Branch ^bb1(%89, %82, %84, %86, %88 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %14 = spirv.Load "Function" %13 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %15 = spirv.Load "Function" %12 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %16 = spirv.Load "Function" %11 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %17 = spirv.Load "Function" %10 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %18 = spirv.IMul %2, %cst2560_i32 : i32 | |
| %19 = spirv.IMul %8, %cst2560_i32 : i32 | |
| %20 = spirv.IAdd %18, %19 : i32 | |
| %21 = spirv.IMul %4, %cst32_i32 : i32 | |
| %22 = spirv.IAdd %20, %21 : i32 | |
| %23 = spirv.SLessThan %6, %cst0_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %6 : i32 | |
| %25 = spirv.Select %23, %24, %6 : i1, i32 | |
| %26 = spirv.SDiv %25, %cst32_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %26 : i32 | |
| %28 = spirv.Select %23, %27, %26 : i1, i32 | |
| %29 = spirv.IMul %28, %cst8_i32 : i32 | |
| %30 = spirv.IAdd %22, %29 : i32 | |
| %31 = spirv.IAdd %30, %cst1398_i32 : i32 | |
| %32 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %32, %14, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %33 = spirv.IAdd %30, %cst1396_i32 : i32 | |
| %34 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %33] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %34, %15, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %35 = spirv.IAdd %30, %cst1394_i32 : i32 | |
| %36 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %36, %16, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %37 = spirv.IAdd %30, %cst1392_i32 : i32 | |
| %38 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %38, %17, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_7_matmul_32x1280x320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_7_matmul_32x1280x320 "LocalSize", 128, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_8 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_8_generic_2x1280 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c10 = arith.constant 10 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c10, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_8_generic_2x1280() "None" { | |
| %cst2784_i32 = spirv.Constant 2784 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst216398080_i32 = spirv.Constant 216398080 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32> | |
| %cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32> | |
| %cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32> | |
| %cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32> | |
| %cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32> | |
| %cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32> | |
| %cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32> | |
| %cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32> | |
| %cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32> | |
| %cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32> | |
| %cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32> | |
| %cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32> | |
| %cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16> | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst13024_i32 = spirv.Constant 13024 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %6 = spirv.IMul %3, %cst32_i32 : i32 | |
| %7 = spirv.IAdd %6, %5 : i32 | |
| %8 = spirv.IAdd %7, %cst216398080_i32 : i32 | |
| %9 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %8] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %10 = spirv.Load "StorageBuffer" %9 : vector<4xf16> | |
| %11 = spirv.IMul %1, %cst320_i32 : i32 | |
| %12 = spirv.IAdd %7, %11 : i32 | |
| %13 = spirv.IAdd %12, %cst2784_i32 : i32 | |
| %14 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %13] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %15 = spirv.Load "StorageBuffer" %14 : vector<4xf16> | |
| %16 = spirv.FAdd %10, %15 : vector<4xf16> | |
| %17 = spirv.FNegate %16 : vector<4xf16> | |
| %18 = spirv.FConvert %17 : vector<4xf16> to vector<4xf32> | |
| %19 = spirv.IsNan %18 : vector<4xf32> | |
| %20 = spirv.LogicalOr %19, %19 : vector<4xi1> | |
| %21 = spirv.FMul %18, %cst_vec_4xf32_0 : vector<4xf32> | |
| %22 = spirv.GL.Floor %21 : vector<4xf32> | |
| %23 = spirv.FMul %22, %cst_vec_4xf32 : vector<4xf32> | |
| %24 = spirv.FSub %18, %23 : vector<4xf32> | |
| %25 = spirv.FMul %24, %24 : vector<4xf32> | |
| %26 = spirv.FMul %25, %25 : vector<4xf32> | |
| %27 = spirv.GL.Fma %cst_vec_4xf32_1, %24, %cst_vec_4xf32_1 : vector<4xf32> | |
| %28 = spirv.GL.Fma %cst_vec_4xf32_3, %24, %cst_vec_4xf32_2 : vector<4xf32> | |
| %29 = spirv.GL.Fma %cst_vec_4xf32_5, %24, %cst_vec_4xf32_4 : vector<4xf32> | |
| %30 = spirv.GL.Fma %28, %25, %27 : vector<4xf32> | |
| %31 = spirv.GL.Fma %29, %26, %30 : vector<4xf32> | |
| %32 = spirv.ConvertFToS %22 : vector<4xf32> to vector<4xi32> | |
| %33 = spirv.IAdd %32, %cst_vec_4xi32_6 : vector<4xi32> | |
| %34 = spirv.ShiftLeftLogical %33, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %35 = spirv.Bitcast %34 : vector<4xi32> to vector<4xf32> | |
| %36 = spirv.FMul %31, %35 : vector<4xf32> | |
| %37 = spirv.SLessThanEqual %32, %cst_vec_4xi32_6 : vector<4xi32> | |
| %38 = spirv.SGreaterThanEqual %32, %cst_vec_4xi32_11 : vector<4xi32> | |
| %39 = spirv.FOrdEqual %18, %cst_vec_4xf32_9 : vector<4xf32> | |
| %40 = spirv.FOrdEqual %18, %cst_vec_4xf32_8 : vector<4xf32> | |
| %41 = spirv.FOrdGreaterThan %18, %cst_vec_4xf32_7 : vector<4xf32> | |
| %42 = spirv.LogicalAnd %37, %38 : vector<4xi1> | |
| %43 = spirv.Select %41, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %44 = spirv.Select %42, %36, %43 : vector<4xi1>, vector<4xf32> | |
| %45 = spirv.Select %40, %cst_vec_4xf32_8, %44 : vector<4xi1>, vector<4xf32> | |
| %46 = spirv.Select %39, %cst_vec_4xf32_7, %45 : vector<4xi1>, vector<4xf32> | |
| %47 = spirv.Select %20, %18, %46 : vector<4xi1>, vector<4xf32> | |
| %48 = spirv.FConvert %47 : vector<4xf32> to vector<4xf16> | |
| %49 = spirv.FAdd %48, %cst_vec_4xf16 : vector<4xf16> | |
| %50 = spirv.FDiv %cst_vec_4xf16, %49 : vector<4xf16> | |
| %51 = spirv.FMul %50, %16 : vector<4xf16> | |
| %52 = spirv.IAdd %12, %cst13024_i32 : i32 | |
| %53 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %53, %51 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_8_generic_2x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_8_generic_2x1280 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_9 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_9_matmul_32x1280x1280 ordinal(0) layout(#pipeline_layout3) attributes {subgroup_size = 32 : index, translation_info = #translation2, workgroup_size = [128 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_9_matmul_32x1280x1280() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %false = spirv.Constant false | |
| %cst534_i32 = spirv.Constant 534 : i32 | |
| %cst532_i32 = spirv.Constant 532 : i32 | |
| %cst530_i32 = spirv.Constant 530 : i32 | |
| %cst528_i32 = spirv.Constant 528 : i32 | |
| %cst6_i32 = spirv.Constant 6 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst33_i32 = spirv.Constant 33 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst2560_i32 = spirv.Constant 2560 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst1024_i32 = spirv.Constant 1024 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %1 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %2 = spirv.Load "PushConstant" %1 : i32 | |
| %3 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %4 = spirv.Load "PushConstant" %3 : i32 | |
| %5 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %6 = spirv.Load "PushConstant" %5 : i32 | |
| %7 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %9 = spirv.Select %7, %8, %2 : i1, i32 | |
| %10 = spirv.SDiv %9, %cst16_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %12 = spirv.Select %7, %11, %10 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %13 = spirv.SLessThan %4, %cst0_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %4 : i32 | |
| %15 = spirv.Select %13, %14, %4 : i1, i32 | |
| %16 = spirv.SDiv %15, %cst16_i32 : i32 | |
| %17 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %18 = spirv.Select %13, %17, %16 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %19 = spirv.SLessThan %6, %cst0_i32 : i32 | |
| %20 = spirv.ISub %cst-1_i32, %6 : i32 | |
| %21 = spirv.Select %19, %20, %6 : i1, i32 | |
| %22 = spirv.SDiv %21, %cst16_i32 : i32 | |
| %23 = spirv.ISub %cst-1_i32, %22 : i32 | |
| %24 = spirv.Select %19, %23, %22 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %25 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %26 = spirv.CompositeExtract %25[1 : i32] : vector<3xi32> | |
| %27 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %28 = spirv.CompositeExtract %27[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %29 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %30 = spirv.CompositeExtract %29[0 : i32] : vector<3xi32> | |
| %31 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %32 = spirv.CompositeExtract %31[1 : i32] : vector<3xi32> | |
| %33 = spirv.IMul %30, %cst8_i32 : i32 | |
| %34 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %35 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %36 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %37 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>) | |
| ^bb1(%63: i32, %64: !spirv.coopmatrix<16x16xf16, Subgroup>, %65: !spirv.coopmatrix<16x16xf16, Subgroup>, %66: !spirv.coopmatrix<16x16xf16, Subgroup>, %67: !spirv.coopmatrix<16x16xf16, Subgroup>): // 2 preds: ^bb0, ^bb2 | |
| %68 = spirv.SLessThan %63, %cst1280_i32 : i32 | |
| spirv.BranchConditional %68, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%32 : i32) | |
| ^bb1(%114: i32): // 2 preds: ^bb0, ^bb2 | |
| %115 = spirv.SLessThan %114, %cst16_i32 : i32 | |
| spirv.BranchConditional %115, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%33 : i32) | |
| ^bb1(%117: i32): // 2 preds: ^bb0, ^bb2 | |
| %118 = spirv.SLessThan %117, %cst32_i32 : i32 | |
| spirv.BranchConditional %118, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %119 = spirv.IMul %114, %cst160_i32 : i32 | |
| %120 = spirv.IMul %26, %cst2560_i32 : i32 | |
| %121 = spirv.IAdd %119, %120 : i32 | |
| %122 = spirv.IAdd %121, %12 : i32 | |
| %123 = spirv.IAdd %63, %117 : i32 | |
| %124 = spirv.SLessThan %123, %cst0_i32 : i32 | |
| %125 = spirv.ISub %cst-1_i32, %123 : i32 | |
| %126 = spirv.Select %124, %125, %123 : i1, i32 | |
| %127 = spirv.SDiv %126, %cst8_i32 : i32 | |
| %128 = spirv.ISub %cst-1_i32, %127 : i32 | |
| %129 = spirv.Select %124, %128, %127 : i1, i32 | |
| %130 = spirv.IAdd %122, %129 : i32 | |
| %131 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %130] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %132 = spirv.Load "StorageBuffer" %131 : vector<4xf32> | |
| %133 = spirv.IMul %114, %cst5_i32 : i32 | |
| %134 = spirv.SLessThan %117, %cst0_i32 : i32 | |
| %135 = spirv.ISub %cst-1_i32, %117 : i32 | |
| %136 = spirv.Select %134, %135, %117 : i1, i32 | |
| %137 = spirv.SDiv %136, %cst8_i32 : i32 | |
| %138 = spirv.ISub %cst-1_i32, %137 : i32 | |
| %139 = spirv.Select %134, %138, %137 : i1, i32 | |
| %140 = spirv.IAdd %133, %139 : i32 | |
| %141 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %140] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %141, %132 : vector<4xf32> | |
| %142 = spirv.IAdd %117, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%142 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %116 = spirv.IAdd %114, %cst1_i32 : i32 | |
| spirv.Branch ^bb1(%116 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%32 : i32) | |
| ^bb1(%114: i32): // 2 preds: ^bb0, ^bb2 | |
| %115 = spirv.SLessThan %114, %cst32_i32 : i32 | |
| spirv.BranchConditional %115, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%33 : i32) | |
| ^bb1(%117: i32): // 2 preds: ^bb0, ^bb2 | |
| %118 = spirv.SLessThan %117, %cst256_i32 : i32 | |
| spirv.BranchConditional %118, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %119 = spirv.IMul %63, %cst160_i32 : i32 | |
| %120 = spirv.IMul %114, %cst160_i32 : i32 | |
| %121 = spirv.IAdd %119, %120 : i32 | |
| %122 = spirv.IMul %28, %cst32_i32 : i32 | |
| %123 = spirv.IAdd %121, %122 : i32 | |
| %124 = spirv.IAdd %123, %18 : i32 | |
| %125 = spirv.SLessThan %117, %cst0_i32 : i32 | |
| %126 = spirv.ISub %cst-1_i32, %117 : i32 | |
| %127 = spirv.Select %125, %126, %117 : i1, i32 | |
| %128 = spirv.SDiv %127, %cst8_i32 : i32 | |
| %129 = spirv.ISub %cst-1_i32, %128 : i32 | |
| %130 = spirv.Select %125, %129, %128 : i1, i32 | |
| %131 = spirv.IAdd %124, %130 : i32 | |
| %132 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %133 = spirv.Load "StorageBuffer" %132 : vector<4xf32> | |
| %134 = spirv.IMul %114, %cst33_i32 : i32 | |
| %135 = spirv.IAdd %134, %130 : i32 | |
| %136 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %136, %133 : vector<4xf32> | |
| %137 = spirv.IAdd %117, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%137 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %116 = spirv.IAdd %114, %cst1_i32 : i32 | |
| spirv.Branch ^bb1(%116 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %69 = spirv.IMul %32, %cst80_i32 : i32 | |
| %70 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %71 = spirv.NV.CooperativeMatrixLoad %70, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %72 = spirv.IAdd %69, %cst2_i32 : i32 | |
| %73 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %74 = spirv.NV.CooperativeMatrixLoad %73, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %75 = spirv.SLessThan %30, %cst0_i32 : i32 | |
| %76 = spirv.ISub %cst-1_i32, %30 : i32 | |
| %77 = spirv.Select %75, %76, %30 : i1, i32 | |
| %78 = spirv.SDiv %77, %cst32_i32 : i32 | |
| %79 = spirv.ISub %cst-1_i32, %78 : i32 | |
| %80 = spirv.Select %75, %79, %78 : i1, i32 | |
| %81 = spirv.IMul %80, %cst8_i32 : i32 | |
| %82 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %83 = spirv.NV.CooperativeMatrixLoad %82, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %84 = spirv.IAdd %81, %cst2_i32 : i32 | |
| %85 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %84] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %86 = spirv.NV.CooperativeMatrixLoad %85, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %87 = spirv.IAdd %81, %cst4_i32 : i32 | |
| %88 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %89 = spirv.NV.CooperativeMatrixLoad %88, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %90 = spirv.IAdd %81, %cst6_i32 : i32 | |
| %91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %92 = spirv.NV.CooperativeMatrixLoad %91, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %93 = spirv.IAdd %81, %cst528_i32 : i32 | |
| %94 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %93] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %95 = spirv.NV.CooperativeMatrixLoad %94, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %96 = spirv.IAdd %81, %cst530_i32 : i32 | |
| %97 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %96] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %98 = spirv.NV.CooperativeMatrixLoad %97, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %99 = spirv.IAdd %81, %cst532_i32 : i32 | |
| %100 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %101 = spirv.NV.CooperativeMatrixLoad %100, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %102 = spirv.IAdd %81, %cst534_i32 : i32 | |
| %103 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %104 = spirv.NV.CooperativeMatrixLoad %103, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %105 = spirv.NV.CooperativeMatrixMulAdd %71, %83, %64 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %106 = spirv.NV.CooperativeMatrixMulAdd %74, %95, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %107 = spirv.NV.CooperativeMatrixMulAdd %71, %86, %65 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %108 = spirv.NV.CooperativeMatrixMulAdd %74, %98, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %109 = spirv.NV.CooperativeMatrixMulAdd %71, %89, %66 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %110 = spirv.NV.CooperativeMatrixMulAdd %74, %101, %109 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %111 = spirv.NV.CooperativeMatrixMulAdd %71, %92, %67 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %112 = spirv.NV.CooperativeMatrixMulAdd %74, %104, %111 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %34, %106 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %35, %108 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %36, %110 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %37, %112 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %113 = spirv.IAdd %63, %cst32_i32 : i32 | |
| spirv.Branch ^bb1(%113, %106, %108, %110, %112 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %38 = spirv.Load "Function" %37 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %39 = spirv.Load "Function" %36 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %40 = spirv.Load "Function" %35 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %41 = spirv.Load "Function" %34 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %42 = spirv.IMul %26, %cst2560_i32 : i32 | |
| %43 = spirv.IMul %32, %cst2560_i32 : i32 | |
| %44 = spirv.IAdd %42, %43 : i32 | |
| %45 = spirv.IMul %28, %cst32_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IAdd %46, %24 : i32 | |
| %48 = spirv.SLessThan %30, %cst0_i32 : i32 | |
| %49 = spirv.ISub %cst-1_i32, %30 : i32 | |
| %50 = spirv.Select %48, %49, %30 : i1, i32 | |
| %51 = spirv.SDiv %50, %cst32_i32 : i32 | |
| %52 = spirv.ISub %cst-1_i32, %51 : i32 | |
| %53 = spirv.Select %48, %52, %51 : i1, i32 | |
| %54 = spirv.IMul %53, %cst8_i32 : i32 | |
| %55 = spirv.IAdd %47, %54 : i32 | |
| %56 = spirv.IAdd %55, %cst6_i32 : i32 | |
| %57 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %57, %38, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %58 = spirv.IAdd %55, %cst4_i32 : i32 | |
| %59 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %59, %39, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %60 = spirv.IAdd %55, %cst2_i32 : i32 | |
| %61 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %61, %40, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %62 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %62, %41, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_9_matmul_32x1280x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_9_matmul_32x1280x1280 "LocalSize", 128, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_10 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_10 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c3 = arith.constant 3 : index | |
| %c96 = arith.constant 96 : index | |
| %c8 = arith.constant 8 : index | |
| hal.return %c3, %c96, %c8 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_10() "None" { | |
| %cst419_i32 = spirv.Constant 419 : i32 | |
| %cst38416_i32 = spirv.Constant 38416 : i32 | |
| %cst98_i32 = spirv.Constant 98 : i32 | |
| %cst9604_i32 = spirv.Constant 9604 : i32 | |
| %cst36864_i32 = spirv.Constant 36864 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32> | |
| %4 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[2 : i32] : vector<3xi32> | |
| %6 = spirv.UMod %5, %cst4_i32 : i32 | |
| %7 = spirv.UDiv %5, %cst4_i32 : i32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %8 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32> | |
| %10 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32> | |
| %12 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %13 = spirv.CompositeExtract %12[2 : i32] : vector<3xi32> | |
| %14 = spirv.IMul %6, %cst9216_i32 : i32 | |
| %15 = spirv.IMul %13, %cst9216_i32 : i32 | |
| %16 = spirv.IAdd %14, %15 : i32 | |
| %17 = spirv.IMul %3, %cst96_i32 : i32 | |
| %18 = spirv.IAdd %16, %17 : i32 | |
| %19 = spirv.IMul %11, %cst96_i32 : i32 | |
| %20 = spirv.IAdd %18, %19 : i32 | |
| %21 = spirv.IAdd %20, %9 : i32 | |
| %22 = spirv.IMul %1, %cst32_i32 : i32 | |
| %23 = spirv.IAdd %21, %22 : i32 | |
| %24 = spirv.IMul %7, %cst36864_i32 : i32 | |
| %25 = spirv.IAdd %23, %24 : i32 | |
| %26 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %25] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %27 = spirv.Load "StorageBuffer" %26 : f16 | |
| %28 = spirv.IMul %6, %cst9604_i32 : i32 | |
| %29 = spirv.IMul %13, %cst9604_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IMul %3, %cst98_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IMul %11, %cst98_i32 : i32 | |
| %34 = spirv.IAdd %32, %33 : i32 | |
| %35 = spirv.IAdd %34, %9 : i32 | |
| %36 = spirv.IAdd %35, %22 : i32 | |
| %37 = spirv.IMul %7, %cst38416_i32 : i32 | |
| %38 = spirv.IAdd %36, %37 : i32 | |
| %39 = spirv.IAdd %38, %cst419_i32 : i32 | |
| %40 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %39] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %40, %27 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_10, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_10 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_11 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_11_generic_2x4x3x3x96x96 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index): | |
| %c3 = arith.constant 3 : index | |
| %c96 = arith.constant 96 : index | |
| %c72 = arith.constant 72 : index | |
| hal.return %c3, %c96, %c72 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_11_generic_2x4x3x3x96x96() "None" { | |
| %cst78432_i32 = spirv.Constant 78432 : i32 | |
| %cst27648_i32 = spirv.Constant 27648 : i32 | |
| %cst82944_i32 = spirv.Constant 82944 : i32 | |
| %cst331776_i32 = spirv.Constant 331776 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst9604_i32 = spirv.Constant 9604 : i32 | |
| %cst38416_i32 = spirv.Constant 38416 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst98_i32 = spirv.Constant 98 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[2 : i32] : vector<3xi32> | |
| %2 = spirv.UDiv %1, %cst3_i32 : i32 | |
| %3 = spirv.UDiv %2, %cst3_i32 : i32 | |
| %4 = spirv.UDiv %3, %cst4_i32 : i32 | |
| %5 = spirv.UMod %3, %cst4_i32 : i32 | |
| %6 = spirv.UMod %2, %cst3_i32 : i32 | |
| %7 = spirv.UMod %1, %cst3_i32 : i32 | |
| %8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %9 = spirv.CompositeExtract %8[1 : i32] : vector<3xi32> | |
| %10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %11 = spirv.CompositeExtract %10[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %12 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %13 = spirv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
| %14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %15 = spirv.CompositeExtract %14[1 : i32] : vector<3xi32> | |
| %16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.IMul %15, %cst98_i32 : i32 | |
| %19 = spirv.IMul %6, %cst98_i32 : i32 | |
| %20 = spirv.IAdd %18, %19 : i32 | |
| %21 = spirv.IMul %9, %cst98_i32 : i32 | |
| %22 = spirv.IAdd %20, %21 : i32 | |
| %23 = spirv.IAdd %22, %7 : i32 | |
| %24 = spirv.IMul %11, %cst32_i32 : i32 | |
| %25 = spirv.IAdd %23, %24 : i32 | |
| %26 = spirv.IAdd %25, %17 : i32 | |
| %27 = spirv.IAdd %26, %13 : i32 | |
| %28 = spirv.IMul %4, %cst38416_i32 : i32 | |
| %29 = spirv.IAdd %27, %28 : i32 | |
| %30 = spirv.IMul %5, %cst9604_i32 : i32 | |
| %31 = spirv.IAdd %29, %30 : i32 | |
| %32 = spirv.IAdd %31, %cst320_i32 : i32 | |
| %33 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %34 = spirv.Load "StorageBuffer" %33 : f16 | |
| %35 = spirv.IMul %7, %cst9216_i32 : i32 | |
| %36 = spirv.IMul %17, %cst9216_i32 : i32 | |
| %37 = spirv.IAdd %35, %36 : i32 | |
| %38 = spirv.IMul %9, %cst96_i32 : i32 | |
| %39 = spirv.IAdd %37, %38 : i32 | |
| %40 = spirv.IMul %15, %cst96_i32 : i32 | |
| %41 = spirv.IAdd %39, %40 : i32 | |
| %42 = spirv.IAdd %41, %13 : i32 | |
| %43 = spirv.IAdd %42, %24 : i32 | |
| %44 = spirv.IMul %4, %cst331776_i32 : i32 | |
| %45 = spirv.IAdd %43, %44 : i32 | |
| %46 = spirv.IMul %5, %cst82944_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IMul %6, %cst27648_i32 : i32 | |
| %49 = spirv.IAdd %47, %48 : i32 | |
| %50 = spirv.IAdd %49, %cst78432_i32 : i32 | |
| %51 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %50] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %51, %34 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_11_generic_2x4x3x3x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_11_generic_2x4x3x3x96x96 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_12 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_12_generic_2x320x9216x36 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 8 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c36 = arith.constant 36 : index | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c36, %c5, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_12_generic_2x320x9216x36() "None" { | |
| %cst100812_i32 = spirv.Constant 100812 : i32 | |
| %cst99660_i32 = spirv.Constant 99660 : i32 | |
| %cst98508_i32 = spirv.Constant 98508 : i32 | |
| %cst97356_i32 = spirv.Constant 97356 : i32 | |
| %cst96204_i32 = spirv.Constant 96204 : i32 | |
| %cst95052_i32 = spirv.Constant 95052 : i32 | |
| %cst93900_i32 = spirv.Constant 93900 : i32 | |
| %cst368640_i32 = spirv.Constant 368640 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst108199360_i32 = spirv.Constant 108199360 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst13260_i32 = spirv.Constant 13260 : i32 | |
| %cst12108_i32 = spirv.Constant 12108 : i32 | |
| %cst10956_i32 = spirv.Constant 10956 : i32 | |
| %cst9804_i32 = spirv.Constant 9804 : i32 | |
| %cst41472_i32 = spirv.Constant 41472 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst1152_i32 = spirv.Constant 1152 : i32 | |
| %cst75336383_i32 = spirv.Constant 75336383 : i32 | |
| %cst75336374_i32 = spirv.Constant 75336374 : i32 | |
| %cst75336365_i32 = spirv.Constant 75336365 : i32 | |
| %cst75336356_i32 = spirv.Constant 75336356 : i32 | |
| %cst75336347_i32 = spirv.Constant 75336347 : i32 | |
| %cst75336338_i32 = spirv.Constant 75336338 : i32 | |
| %cst75336329_i32 = spirv.Constant 75336329 : i32 | |
| %cst75336320_i32 = spirv.Constant 75336320 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst576_i32 = spirv.Constant 576 : i32 | |
| %cst92748_i32 = spirv.Constant 92748 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst36_i32 = spirv.Constant 36 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[2 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32> | |
| %4 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32> | |
| %8 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32> | |
| %10 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %11 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %12 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %13 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %14 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %15 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %16 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| %17 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32 : i32, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) | |
| ^bb1(%159: i32, %160: vector<4xf32>, %161: vector<4xf32>, %162: vector<4xf32>, %163: vector<4xf32>, %164: vector<4xf32>, %165: vector<4xf32>, %166: vector<4xf32>, %167: vector<4xf32>): // 2 preds: ^bb0, ^bb2 | |
| %168 = spirv.SLessThan %159, %cst36_i32 : i32 | |
| spirv.BranchConditional %168, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %169 = spirv.IMul %3, %cst576_i32 : i32 | |
| %170 = spirv.IMul %7, %cst72_i32 : i32 | |
| %171 = spirv.IAdd %169, %170 : i32 | |
| %172 = spirv.SLessThan %159, %cst0_i32 : i32 | |
| %173 = spirv.ISub %cst-1_i32, %159 : i32 | |
| %174 = spirv.Select %172, %173, %159 : i1, i32 | |
| %175 = spirv.SDiv %174, %cst4_i32 : i32 | |
| %176 = spirv.ISub %cst-1_i32, %175 : i32 | |
| %177 = spirv.Select %172, %176, %175 : i1, i32 | |
| %178 = spirv.IAdd %171, %177 : i32 | |
| %179 = spirv.IAdd %178, %cst75336320_i32 : i32 | |
| %180 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %179] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %181 = spirv.Load "StorageBuffer" %180 : vector<4xf16> | |
| %182 = spirv.IAdd %178, %cst75336329_i32 : i32 | |
| %183 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %182] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %184 = spirv.Load "StorageBuffer" %183 : vector<4xf16> | |
| %185 = spirv.IAdd %178, %cst75336338_i32 : i32 | |
| %186 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %187 = spirv.Load "StorageBuffer" %186 : vector<4xf16> | |
| %188 = spirv.IAdd %178, %cst75336347_i32 : i32 | |
| %189 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %188] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %190 = spirv.Load "StorageBuffer" %189 : vector<4xf16> | |
| %191 = spirv.IAdd %178, %cst75336356_i32 : i32 | |
| %192 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %193 = spirv.Load "StorageBuffer" %192 : vector<4xf16> | |
| %194 = spirv.IAdd %178, %cst75336365_i32 : i32 | |
| %195 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %196 = spirv.Load "StorageBuffer" %195 : vector<4xf16> | |
| %197 = spirv.IAdd %178, %cst75336374_i32 : i32 | |
| %198 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %199 = spirv.Load "StorageBuffer" %198 : vector<4xf16> | |
| %200 = spirv.IAdd %178, %cst75336383_i32 : i32 | |
| %201 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %202 = spirv.Load "StorageBuffer" %201 : vector<4xf16> | |
| %203 = spirv.IMul %159, %cst1152_i32 : i32 | |
| %204 = spirv.IMul %5, %cst32_i32 : i32 | |
| %205 = spirv.IAdd %203, %204 : i32 | |
| %206 = spirv.IAdd %205, %9 : i32 | |
| %207 = spirv.IMul %1, %cst41472_i32 : i32 | |
| %208 = spirv.IAdd %206, %207 : i32 | |
| %209 = spirv.IAdd %208, %cst9804_i32 : i32 | |
| %210 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %209] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %211 = spirv.Load "StorageBuffer" %210 : vector<4xf32> | |
| %212 = spirv.IAdd %208, %cst10956_i32 : i32 | |
| %213 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %212] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %214 = spirv.Load "StorageBuffer" %213 : vector<4xf32> | |
| %215 = spirv.IAdd %208, %cst12108_i32 : i32 | |
| %216 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %215] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %217 = spirv.Load "StorageBuffer" %216 : vector<4xf32> | |
| %218 = spirv.IAdd %208, %cst13260_i32 : i32 | |
| %219 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %218] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %220 = spirv.Load "StorageBuffer" %219 : vector<4xf32> | |
| %221 = spirv.VectorShuffle [0 : i32, 1 : i32] %211 : vector<4xf32>, %211 : vector<4xf32> -> vector<2xf32> | |
| %222 = spirv.Bitcast %221 : vector<2xf32> to vector<4xf16> | |
| %223 = spirv.VectorShuffle [0 : i32, 1 : i32] %167 : vector<4xf32>, %167 : vector<4xf32> -> vector<2xf32> | |
| %224 = spirv.Bitcast %223 : vector<2xf32> to vector<4xf16> | |
| %225 = spirv.CompositeExtract %181[0 : i32] : vector<4xf16> | |
| %226 = spirv.CompositeConstruct %225, %225, %225, %225 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %227 = spirv.GL.Fma %226, %222, %224 : vector<4xf16> | |
| %228 = spirv.VectorShuffle [0 : i32, 1 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32> | |
| %229 = spirv.Bitcast %228 : vector<2xf32> to vector<4xf16> | |
| %230 = spirv.CompositeExtract %181[1 : i32] : vector<4xf16> | |
| %231 = spirv.CompositeConstruct %230, %230, %230, %230 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %232 = spirv.GL.Fma %231, %229, %227 : vector<4xf16> | |
| %233 = spirv.VectorShuffle [0 : i32, 1 : i32] %217 : vector<4xf32>, %217 : vector<4xf32> -> vector<2xf32> | |
| %234 = spirv.Bitcast %233 : vector<2xf32> to vector<4xf16> | |
| %235 = spirv.CompositeExtract %181[2 : i32] : vector<4xf16> | |
| %236 = spirv.CompositeConstruct %235, %235, %235, %235 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %237 = spirv.GL.Fma %236, %234, %232 : vector<4xf16> | |
| %238 = spirv.VectorShuffle [0 : i32, 1 : i32] %220 : vector<4xf32>, %220 : vector<4xf32> -> vector<2xf32> | |
| %239 = spirv.Bitcast %238 : vector<2xf32> to vector<4xf16> | |
| %240 = spirv.CompositeExtract %181[3 : i32] : vector<4xf16> | |
| %241 = spirv.CompositeConstruct %240, %240, %240, %240 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %242 = spirv.GL.Fma %241, %239, %237 : vector<4xf16> | |
| %243 = spirv.VectorShuffle [2 : i32, 3 : i32] %211 : vector<4xf32>, %211 : vector<4xf32> -> vector<2xf32> | |
| %244 = spirv.Bitcast %243 : vector<2xf32> to vector<4xf16> | |
| %245 = spirv.VectorShuffle [2 : i32, 3 : i32] %167 : vector<4xf32>, %167 : vector<4xf32> -> vector<2xf32> | |
| %246 = spirv.Bitcast %245 : vector<2xf32> to vector<4xf16> | |
| %247 = spirv.GL.Fma %226, %244, %246 : vector<4xf16> | |
| %248 = spirv.VectorShuffle [2 : i32, 3 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32> | |
| %249 = spirv.Bitcast %248 : vector<2xf32> to vector<4xf16> | |
| %250 = spirv.GL.Fma %231, %249, %247 : vector<4xf16> | |
| %251 = spirv.VectorShuffle [2 : i32, 3 : i32] %217 : vector<4xf32>, %217 : vector<4xf32> -> vector<2xf32> | |
| %252 = spirv.Bitcast %251 : vector<2xf32> to vector<4xf16> | |
| %253 = spirv.GL.Fma %236, %252, %250 : vector<4xf16> | |
| %254 = spirv.VectorShuffle [2 : i32, 3 : i32] %220 : vector<4xf32>, %220 : vector<4xf32> -> vector<2xf32> | |
| %255 = spirv.Bitcast %254 : vector<2xf32> to vector<4xf16> | |
| %256 = spirv.GL.Fma %241, %255, %253 : vector<4xf16> | |
| %257 = spirv.VectorShuffle [0 : i32, 1 : i32] %166 : vector<4xf32>, %166 : vector<4xf32> -> vector<2xf32> | |
| %258 = spirv.Bitcast %257 : vector<2xf32> to vector<4xf16> | |
| %259 = spirv.CompositeExtract %184[0 : i32] : vector<4xf16> | |
| %260 = spirv.CompositeConstruct %259, %259, %259, %259 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %261 = spirv.GL.Fma %260, %222, %258 : vector<4xf16> | |
| %262 = spirv.CompositeExtract %184[1 : i32] : vector<4xf16> | |
| %263 = spirv.CompositeConstruct %262, %262, %262, %262 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %264 = spirv.GL.Fma %263, %229, %261 : vector<4xf16> | |
| %265 = spirv.CompositeExtract %184[2 : i32] : vector<4xf16> | |
| %266 = spirv.CompositeConstruct %265, %265, %265, %265 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %267 = spirv.GL.Fma %266, %234, %264 : vector<4xf16> | |
| %268 = spirv.CompositeExtract %184[3 : i32] : vector<4xf16> | |
| %269 = spirv.CompositeConstruct %268, %268, %268, %268 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %270 = spirv.GL.Fma %269, %239, %267 : vector<4xf16> | |
| %271 = spirv.VectorShuffle [2 : i32, 3 : i32] %166 : vector<4xf32>, %166 : vector<4xf32> -> vector<2xf32> | |
| %272 = spirv.Bitcast %271 : vector<2xf32> to vector<4xf16> | |
| %273 = spirv.GL.Fma %260, %244, %272 : vector<4xf16> | |
| %274 = spirv.GL.Fma %263, %249, %273 : vector<4xf16> | |
| %275 = spirv.GL.Fma %266, %252, %274 : vector<4xf16> | |
| %276 = spirv.GL.Fma %269, %255, %275 : vector<4xf16> | |
| %277 = spirv.VectorShuffle [0 : i32, 1 : i32] %165 : vector<4xf32>, %165 : vector<4xf32> -> vector<2xf32> | |
| %278 = spirv.Bitcast %277 : vector<2xf32> to vector<4xf16> | |
| %279 = spirv.CompositeExtract %187[0 : i32] : vector<4xf16> | |
| %280 = spirv.CompositeConstruct %279, %279, %279, %279 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %281 = spirv.GL.Fma %280, %222, %278 : vector<4xf16> | |
| %282 = spirv.CompositeExtract %187[1 : i32] : vector<4xf16> | |
| %283 = spirv.CompositeConstruct %282, %282, %282, %282 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %284 = spirv.GL.Fma %283, %229, %281 : vector<4xf16> | |
| %285 = spirv.CompositeExtract %187[2 : i32] : vector<4xf16> | |
| %286 = spirv.CompositeConstruct %285, %285, %285, %285 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %287 = spirv.GL.Fma %286, %234, %284 : vector<4xf16> | |
| %288 = spirv.CompositeExtract %187[3 : i32] : vector<4xf16> | |
| %289 = spirv.CompositeConstruct %288, %288, %288, %288 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %290 = spirv.GL.Fma %289, %239, %287 : vector<4xf16> | |
| %291 = spirv.VectorShuffle [2 : i32, 3 : i32] %165 : vector<4xf32>, %165 : vector<4xf32> -> vector<2xf32> | |
| %292 = spirv.Bitcast %291 : vector<2xf32> to vector<4xf16> | |
| %293 = spirv.GL.Fma %280, %244, %292 : vector<4xf16> | |
| %294 = spirv.GL.Fma %283, %249, %293 : vector<4xf16> | |
| %295 = spirv.GL.Fma %286, %252, %294 : vector<4xf16> | |
| %296 = spirv.GL.Fma %289, %255, %295 : vector<4xf16> | |
| %297 = spirv.VectorShuffle [0 : i32, 1 : i32] %164 : vector<4xf32>, %164 : vector<4xf32> -> vector<2xf32> | |
| %298 = spirv.Bitcast %297 : vector<2xf32> to vector<4xf16> | |
| %299 = spirv.CompositeExtract %190[0 : i32] : vector<4xf16> | |
| %300 = spirv.CompositeConstruct %299, %299, %299, %299 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %301 = spirv.GL.Fma %300, %222, %298 : vector<4xf16> | |
| %302 = spirv.CompositeExtract %190[1 : i32] : vector<4xf16> | |
| %303 = spirv.CompositeConstruct %302, %302, %302, %302 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %304 = spirv.GL.Fma %303, %229, %301 : vector<4xf16> | |
| %305 = spirv.CompositeExtract %190[2 : i32] : vector<4xf16> | |
| %306 = spirv.CompositeConstruct %305, %305, %305, %305 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %307 = spirv.GL.Fma %306, %234, %304 : vector<4xf16> | |
| %308 = spirv.CompositeExtract %190[3 : i32] : vector<4xf16> | |
| %309 = spirv.CompositeConstruct %308, %308, %308, %308 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %310 = spirv.GL.Fma %309, %239, %307 : vector<4xf16> | |
| %311 = spirv.VectorShuffle [2 : i32, 3 : i32] %164 : vector<4xf32>, %164 : vector<4xf32> -> vector<2xf32> | |
| %312 = spirv.Bitcast %311 : vector<2xf32> to vector<4xf16> | |
| %313 = spirv.GL.Fma %300, %244, %312 : vector<4xf16> | |
| %314 = spirv.GL.Fma %303, %249, %313 : vector<4xf16> | |
| %315 = spirv.GL.Fma %306, %252, %314 : vector<4xf16> | |
| %316 = spirv.GL.Fma %309, %255, %315 : vector<4xf16> | |
| %317 = spirv.VectorShuffle [0 : i32, 1 : i32] %163 : vector<4xf32>, %163 : vector<4xf32> -> vector<2xf32> | |
| %318 = spirv.Bitcast %317 : vector<2xf32> to vector<4xf16> | |
| %319 = spirv.CompositeExtract %193[0 : i32] : vector<4xf16> | |
| %320 = spirv.CompositeConstruct %319, %319, %319, %319 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %321 = spirv.GL.Fma %320, %222, %318 : vector<4xf16> | |
| %322 = spirv.CompositeExtract %193[1 : i32] : vector<4xf16> | |
| %323 = spirv.CompositeConstruct %322, %322, %322, %322 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %324 = spirv.GL.Fma %323, %229, %321 : vector<4xf16> | |
| %325 = spirv.CompositeExtract %193[2 : i32] : vector<4xf16> | |
| %326 = spirv.CompositeConstruct %325, %325, %325, %325 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %327 = spirv.GL.Fma %326, %234, %324 : vector<4xf16> | |
| %328 = spirv.CompositeExtract %193[3 : i32] : vector<4xf16> | |
| %329 = spirv.CompositeConstruct %328, %328, %328, %328 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %330 = spirv.GL.Fma %329, %239, %327 : vector<4xf16> | |
| %331 = spirv.VectorShuffle [2 : i32, 3 : i32] %163 : vector<4xf32>, %163 : vector<4xf32> -> vector<2xf32> | |
| %332 = spirv.Bitcast %331 : vector<2xf32> to vector<4xf16> | |
| %333 = spirv.GL.Fma %320, %244, %332 : vector<4xf16> | |
| %334 = spirv.GL.Fma %323, %249, %333 : vector<4xf16> | |
| %335 = spirv.GL.Fma %326, %252, %334 : vector<4xf16> | |
| %336 = spirv.GL.Fma %329, %255, %335 : vector<4xf16> | |
| %337 = spirv.VectorShuffle [0 : i32, 1 : i32] %162 : vector<4xf32>, %162 : vector<4xf32> -> vector<2xf32> | |
| %338 = spirv.Bitcast %337 : vector<2xf32> to vector<4xf16> | |
| %339 = spirv.CompositeExtract %196[0 : i32] : vector<4xf16> | |
| %340 = spirv.CompositeConstruct %339, %339, %339, %339 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %341 = spirv.GL.Fma %340, %222, %338 : vector<4xf16> | |
| %342 = spirv.CompositeExtract %196[1 : i32] : vector<4xf16> | |
| %343 = spirv.CompositeConstruct %342, %342, %342, %342 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %344 = spirv.GL.Fma %343, %229, %341 : vector<4xf16> | |
| %345 = spirv.CompositeExtract %196[2 : i32] : vector<4xf16> | |
| %346 = spirv.CompositeConstruct %345, %345, %345, %345 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %347 = spirv.GL.Fma %346, %234, %344 : vector<4xf16> | |
| %348 = spirv.CompositeExtract %196[3 : i32] : vector<4xf16> | |
| %349 = spirv.CompositeConstruct %348, %348, %348, %348 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %350 = spirv.GL.Fma %349, %239, %347 : vector<4xf16> | |
| %351 = spirv.VectorShuffle [2 : i32, 3 : i32] %162 : vector<4xf32>, %162 : vector<4xf32> -> vector<2xf32> | |
| %352 = spirv.Bitcast %351 : vector<2xf32> to vector<4xf16> | |
| %353 = spirv.GL.Fma %340, %244, %352 : vector<4xf16> | |
| %354 = spirv.GL.Fma %343, %249, %353 : vector<4xf16> | |
| %355 = spirv.GL.Fma %346, %252, %354 : vector<4xf16> | |
| %356 = spirv.GL.Fma %349, %255, %355 : vector<4xf16> | |
| %357 = spirv.VectorShuffle [0 : i32, 1 : i32] %161 : vector<4xf32>, %161 : vector<4xf32> -> vector<2xf32> | |
| %358 = spirv.Bitcast %357 : vector<2xf32> to vector<4xf16> | |
| %359 = spirv.CompositeExtract %199[0 : i32] : vector<4xf16> | |
| %360 = spirv.CompositeConstruct %359, %359, %359, %359 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %361 = spirv.GL.Fma %360, %222, %358 : vector<4xf16> | |
| %362 = spirv.CompositeExtract %199[1 : i32] : vector<4xf16> | |
| %363 = spirv.CompositeConstruct %362, %362, %362, %362 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %364 = spirv.GL.Fma %363, %229, %361 : vector<4xf16> | |
| %365 = spirv.CompositeExtract %199[2 : i32] : vector<4xf16> | |
| %366 = spirv.CompositeConstruct %365, %365, %365, %365 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %367 = spirv.GL.Fma %366, %234, %364 : vector<4xf16> | |
| %368 = spirv.CompositeExtract %199[3 : i32] : vector<4xf16> | |
| %369 = spirv.CompositeConstruct %368, %368, %368, %368 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %370 = spirv.GL.Fma %369, %239, %367 : vector<4xf16> | |
| %371 = spirv.VectorShuffle [2 : i32, 3 : i32] %161 : vector<4xf32>, %161 : vector<4xf32> -> vector<2xf32> | |
| %372 = spirv.Bitcast %371 : vector<2xf32> to vector<4xf16> | |
| %373 = spirv.GL.Fma %360, %244, %372 : vector<4xf16> | |
| %374 = spirv.GL.Fma %363, %249, %373 : vector<4xf16> | |
| %375 = spirv.GL.Fma %366, %252, %374 : vector<4xf16> | |
| %376 = spirv.GL.Fma %369, %255, %375 : vector<4xf16> | |
| %377 = spirv.VectorShuffle [0 : i32, 1 : i32] %160 : vector<4xf32>, %160 : vector<4xf32> -> vector<2xf32> | |
| %378 = spirv.Bitcast %377 : vector<2xf32> to vector<4xf16> | |
| %379 = spirv.CompositeExtract %202[0 : i32] : vector<4xf16> | |
| %380 = spirv.CompositeConstruct %379, %379, %379, %379 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %381 = spirv.GL.Fma %380, %222, %378 : vector<4xf16> | |
| %382 = spirv.CompositeExtract %202[1 : i32] : vector<4xf16> | |
| %383 = spirv.CompositeConstruct %382, %382, %382, %382 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %384 = spirv.GL.Fma %383, %229, %381 : vector<4xf16> | |
| %385 = spirv.CompositeExtract %202[2 : i32] : vector<4xf16> | |
| %386 = spirv.CompositeConstruct %385, %385, %385, %385 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %387 = spirv.GL.Fma %386, %234, %384 : vector<4xf16> | |
| %388 = spirv.CompositeExtract %202[3 : i32] : vector<4xf16> | |
| %389 = spirv.CompositeConstruct %388, %388, %388, %388 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %390 = spirv.GL.Fma %389, %239, %387 : vector<4xf16> | |
| %391 = spirv.VectorShuffle [2 : i32, 3 : i32] %160 : vector<4xf32>, %160 : vector<4xf32> -> vector<2xf32> | |
| %392 = spirv.Bitcast %391 : vector<2xf32> to vector<4xf16> | |
| %393 = spirv.GL.Fma %380, %244, %392 : vector<4xf16> | |
| %394 = spirv.GL.Fma %383, %249, %393 : vector<4xf16> | |
| %395 = spirv.GL.Fma %386, %252, %394 : vector<4xf16> | |
| %396 = spirv.GL.Fma %389, %255, %395 : vector<4xf16> | |
| %397 = spirv.Bitcast %396 : vector<4xf16> to vector<2xf32> | |
| %398 = spirv.Bitcast %390 : vector<4xf16> to vector<2xf32> | |
| %399 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %398 : vector<2xf32> -> vector<4xf32> | |
| %400 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %399 : vector<4xf32>, %397 : vector<2xf32> -> vector<4xf32> | |
| %401 = spirv.Bitcast %376 : vector<4xf16> to vector<2xf32> | |
| %402 = spirv.Bitcast %370 : vector<4xf16> to vector<2xf32> | |
| %403 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %402 : vector<2xf32> -> vector<4xf32> | |
| %404 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %403 : vector<4xf32>, %401 : vector<2xf32> -> vector<4xf32> | |
| %405 = spirv.Bitcast %356 : vector<4xf16> to vector<2xf32> | |
| %406 = spirv.Bitcast %350 : vector<4xf16> to vector<2xf32> | |
| %407 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %406 : vector<2xf32> -> vector<4xf32> | |
| %408 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %407 : vector<4xf32>, %405 : vector<2xf32> -> vector<4xf32> | |
| %409 = spirv.Bitcast %336 : vector<4xf16> to vector<2xf32> | |
| %410 = spirv.Bitcast %330 : vector<4xf16> to vector<2xf32> | |
| %411 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %410 : vector<2xf32> -> vector<4xf32> | |
| %412 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %411 : vector<4xf32>, %409 : vector<2xf32> -> vector<4xf32> | |
| %413 = spirv.Bitcast %316 : vector<4xf16> to vector<2xf32> | |
| %414 = spirv.Bitcast %310 : vector<4xf16> to vector<2xf32> | |
| %415 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %414 : vector<2xf32> -> vector<4xf32> | |
| %416 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %415 : vector<4xf32>, %413 : vector<2xf32> -> vector<4xf32> | |
| %417 = spirv.Bitcast %296 : vector<4xf16> to vector<2xf32> | |
| %418 = spirv.Bitcast %290 : vector<4xf16> to vector<2xf32> | |
| %419 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %418 : vector<2xf32> -> vector<4xf32> | |
| %420 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %419 : vector<4xf32>, %417 : vector<2xf32> -> vector<4xf32> | |
| %421 = spirv.Bitcast %276 : vector<4xf16> to vector<2xf32> | |
| %422 = spirv.Bitcast %270 : vector<4xf16> to vector<2xf32> | |
| %423 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %422 : vector<2xf32> -> vector<4xf32> | |
| %424 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %423 : vector<4xf32>, %421 : vector<2xf32> -> vector<4xf32> | |
| %425 = spirv.Bitcast %256 : vector<4xf16> to vector<2xf32> | |
| %426 = spirv.Bitcast %242 : vector<4xf16> to vector<2xf32> | |
| %427 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %426 : vector<2xf32> -> vector<4xf32> | |
| %428 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %427 : vector<4xf32>, %425 : vector<2xf32> -> vector<4xf32> | |
| spirv.Store "Function" %10, %400 : vector<4xf32> | |
| spirv.Store "Function" %11, %404 : vector<4xf32> | |
| spirv.Store "Function" %12, %408 : vector<4xf32> | |
| spirv.Store "Function" %13, %412 : vector<4xf32> | |
| spirv.Store "Function" %14, %416 : vector<4xf32> | |
| spirv.Store "Function" %15, %420 : vector<4xf32> | |
| spirv.Store "Function" %16, %424 : vector<4xf32> | |
| spirv.Store "Function" %17, %428 : vector<4xf32> | |
| %429 = spirv.IAdd %159, %cst4_i32 : i32 | |
| spirv.Branch ^bb1(%429, %400, %404, %408, %412, %416, %420, %424, %428 : i32, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %18 = spirv.Load "Function" %17 : vector<4xf32> | |
| %19 = spirv.Load "Function" %16 : vector<4xf32> | |
| %20 = spirv.Load "Function" %15 : vector<4xf32> | |
| %21 = spirv.Load "Function" %14 : vector<4xf32> | |
| %22 = spirv.Load "Function" %13 : vector<4xf32> | |
| %23 = spirv.Load "Function" %12 : vector<4xf32> | |
| %24 = spirv.Load "Function" %11 : vector<4xf32> | |
| %25 = spirv.Load "Function" %10 : vector<4xf32> | |
| %26 = spirv.IMul %3, %cst8_i32 : i32 | |
| %27 = spirv.IAdd %26, %7 : i32 | |
| %28 = spirv.IAdd %27, %cst108199360_i32 : i32 | |
| %29 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %28] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %30 = spirv.Load "StorageBuffer" %29 : vector<4xf32> | |
| %31 = spirv.CompositeExtract %30[0 : i32] : vector<4xf32> | |
| %32 = spirv.Bitcast %31 : f32 to vector<2xf16> | |
| %33 = spirv.CompositeExtract %32[0 : i32] : vector<2xf16> | |
| %34 = spirv.CompositeConstruct %33, %33, %33, %33 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %35 = spirv.CompositeExtract %32[1 : i32] : vector<2xf16> | |
| %36 = spirv.CompositeConstruct %35, %35, %35, %35 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %37 = spirv.CompositeExtract %30[1 : i32] : vector<4xf32> | |
| %38 = spirv.Bitcast %37 : f32 to vector<2xf16> | |
| %39 = spirv.CompositeExtract %38[0 : i32] : vector<2xf16> | |
| %40 = spirv.CompositeConstruct %39, %39, %39, %39 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %41 = spirv.CompositeExtract %38[1 : i32] : vector<2xf16> | |
| %42 = spirv.CompositeConstruct %41, %41, %41, %41 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %43 = spirv.CompositeExtract %30[2 : i32] : vector<4xf32> | |
| %44 = spirv.Bitcast %43 : f32 to vector<2xf16> | |
| %45 = spirv.CompositeExtract %44[0 : i32] : vector<2xf16> | |
| %46 = spirv.CompositeConstruct %45, %45, %45, %45 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %47 = spirv.CompositeExtract %44[1 : i32] : vector<2xf16> | |
| %48 = spirv.CompositeConstruct %47, %47, %47, %47 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %49 = spirv.CompositeExtract %30[3 : i32] : vector<4xf32> | |
| %50 = spirv.Bitcast %49 : f32 to vector<2xf16> | |
| %51 = spirv.CompositeExtract %50[0 : i32] : vector<2xf16> | |
| %52 = spirv.CompositeConstruct %51, %51, %51, %51 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %53 = spirv.CompositeExtract %50[1 : i32] : vector<2xf16> | |
| %54 = spirv.CompositeConstruct %53, %53, %53, %53 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %55 = spirv.VectorShuffle [0 : i32, 1 : i32] %18 : vector<4xf32>, %18 : vector<4xf32> -> vector<2xf32> | |
| %56 = spirv.Bitcast %55 : vector<2xf32> to vector<4xf16> | |
| %57 = spirv.FAdd %56, %34 : vector<4xf16> | |
| %58 = spirv.VectorShuffle [2 : i32, 3 : i32] %18 : vector<4xf32>, %18 : vector<4xf32> -> vector<2xf32> | |
| %59 = spirv.Bitcast %58 : vector<2xf32> to vector<4xf16> | |
| %60 = spirv.FAdd %59, %34 : vector<4xf16> | |
| %61 = spirv.VectorShuffle [0 : i32, 1 : i32] %19 : vector<4xf32>, %19 : vector<4xf32> -> vector<2xf32> | |
| %62 = spirv.Bitcast %61 : vector<2xf32> to vector<4xf16> | |
| %63 = spirv.FAdd %62, %36 : vector<4xf16> | |
| %64 = spirv.VectorShuffle [2 : i32, 3 : i32] %19 : vector<4xf32>, %19 : vector<4xf32> -> vector<2xf32> | |
| %65 = spirv.Bitcast %64 : vector<2xf32> to vector<4xf16> | |
| %66 = spirv.FAdd %65, %36 : vector<4xf16> | |
| %67 = spirv.VectorShuffle [0 : i32, 1 : i32] %20 : vector<4xf32>, %20 : vector<4xf32> -> vector<2xf32> | |
| %68 = spirv.Bitcast %67 : vector<2xf32> to vector<4xf16> | |
| %69 = spirv.FAdd %68, %40 : vector<4xf16> | |
| %70 = spirv.VectorShuffle [2 : i32, 3 : i32] %20 : vector<4xf32>, %20 : vector<4xf32> -> vector<2xf32> | |
| %71 = spirv.Bitcast %70 : vector<2xf32> to vector<4xf16> | |
| %72 = spirv.FAdd %71, %40 : vector<4xf16> | |
| %73 = spirv.VectorShuffle [0 : i32, 1 : i32] %21 : vector<4xf32>, %21 : vector<4xf32> -> vector<2xf32> | |
| %74 = spirv.Bitcast %73 : vector<2xf32> to vector<4xf16> | |
| %75 = spirv.FAdd %74, %42 : vector<4xf16> | |
| %76 = spirv.VectorShuffle [2 : i32, 3 : i32] %21 : vector<4xf32>, %21 : vector<4xf32> -> vector<2xf32> | |
| %77 = spirv.Bitcast %76 : vector<2xf32> to vector<4xf16> | |
| %78 = spirv.FAdd %77, %42 : vector<4xf16> | |
| %79 = spirv.VectorShuffle [0 : i32, 1 : i32] %22 : vector<4xf32>, %22 : vector<4xf32> -> vector<2xf32> | |
| %80 = spirv.Bitcast %79 : vector<2xf32> to vector<4xf16> | |
| %81 = spirv.FAdd %80, %46 : vector<4xf16> | |
| %82 = spirv.VectorShuffle [2 : i32, 3 : i32] %22 : vector<4xf32>, %22 : vector<4xf32> -> vector<2xf32> | |
| %83 = spirv.Bitcast %82 : vector<2xf32> to vector<4xf16> | |
| %84 = spirv.FAdd %83, %46 : vector<4xf16> | |
| %85 = spirv.VectorShuffle [0 : i32, 1 : i32] %23 : vector<4xf32>, %23 : vector<4xf32> -> vector<2xf32> | |
| %86 = spirv.Bitcast %85 : vector<2xf32> to vector<4xf16> | |
| %87 = spirv.FAdd %86, %48 : vector<4xf16> | |
| %88 = spirv.VectorShuffle [2 : i32, 3 : i32] %23 : vector<4xf32>, %23 : vector<4xf32> -> vector<2xf32> | |
| %89 = spirv.Bitcast %88 : vector<2xf32> to vector<4xf16> | |
| %90 = spirv.FAdd %89, %48 : vector<4xf16> | |
| %91 = spirv.VectorShuffle [0 : i32, 1 : i32] %24 : vector<4xf32>, %24 : vector<4xf32> -> vector<2xf32> | |
| %92 = spirv.Bitcast %91 : vector<2xf32> to vector<4xf16> | |
| %93 = spirv.FAdd %92, %52 : vector<4xf16> | |
| %94 = spirv.VectorShuffle [2 : i32, 3 : i32] %24 : vector<4xf32>, %24 : vector<4xf32> -> vector<2xf32> | |
| %95 = spirv.Bitcast %94 : vector<2xf32> to vector<4xf16> | |
| %96 = spirv.FAdd %95, %52 : vector<4xf16> | |
| %97 = spirv.VectorShuffle [0 : i32, 1 : i32] %25 : vector<4xf32>, %25 : vector<4xf32> -> vector<2xf32> | |
| %98 = spirv.Bitcast %97 : vector<2xf32> to vector<4xf16> | |
| %99 = spirv.FAdd %98, %54 : vector<4xf16> | |
| %100 = spirv.VectorShuffle [2 : i32, 3 : i32] %25 : vector<4xf32>, %25 : vector<4xf32> -> vector<2xf32> | |
| %101 = spirv.Bitcast %100 : vector<2xf32> to vector<4xf16> | |
| %102 = spirv.FAdd %101, %54 : vector<4xf16> | |
| %103 = spirv.Bitcast %60 : vector<4xf16> to vector<2xf32> | |
| %104 = spirv.Bitcast %57 : vector<4xf16> to vector<2xf32> | |
| %105 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %104 : vector<2xf32> -> vector<4xf32> | |
| %106 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %105 : vector<4xf32>, %103 : vector<2xf32> -> vector<4xf32> | |
| %107 = spirv.IMul %3, %cst73728_i32 : i32 | |
| %108 = spirv.IMul %7, %cst9216_i32 : i32 | |
| %109 = spirv.IAdd %107, %108 : i32 | |
| %110 = spirv.IMul %5, %cst32_i32 : i32 | |
| %111 = spirv.IAdd %109, %110 : i32 | |
| %112 = spirv.IAdd %111, %9 : i32 | |
| %113 = spirv.IMul %1, %cst368640_i32 : i32 | |
| %114 = spirv.IAdd %112, %113 : i32 | |
| %115 = spirv.IAdd %114, %cst92748_i32 : i32 | |
| %116 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %116, %106 : vector<4xf32> | |
| %117 = spirv.Bitcast %66 : vector<4xf16> to vector<2xf32> | |
| %118 = spirv.Bitcast %63 : vector<4xf16> to vector<2xf32> | |
| %119 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %118 : vector<2xf32> -> vector<4xf32> | |
| %120 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %119 : vector<4xf32>, %117 : vector<2xf32> -> vector<4xf32> | |
| %121 = spirv.IAdd %114, %cst93900_i32 : i32 | |
| %122 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %122, %120 : vector<4xf32> | |
| %123 = spirv.Bitcast %72 : vector<4xf16> to vector<2xf32> | |
| %124 = spirv.Bitcast %69 : vector<4xf16> to vector<2xf32> | |
| %125 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %124 : vector<2xf32> -> vector<4xf32> | |
| %126 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %125 : vector<4xf32>, %123 : vector<2xf32> -> vector<4xf32> | |
| %127 = spirv.IAdd %114, %cst95052_i32 : i32 | |
| %128 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %127] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %128, %126 : vector<4xf32> | |
| %129 = spirv.Bitcast %78 : vector<4xf16> to vector<2xf32> | |
| %130 = spirv.Bitcast %75 : vector<4xf16> to vector<2xf32> | |
| %131 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %130 : vector<2xf32> -> vector<4xf32> | |
| %132 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %131 : vector<4xf32>, %129 : vector<2xf32> -> vector<4xf32> | |
| %133 = spirv.IAdd %114, %cst96204_i32 : i32 | |
| %134 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %134, %132 : vector<4xf32> | |
| %135 = spirv.Bitcast %84 : vector<4xf16> to vector<2xf32> | |
| %136 = spirv.Bitcast %81 : vector<4xf16> to vector<2xf32> | |
| %137 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %136 : vector<2xf32> -> vector<4xf32> | |
| %138 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %137 : vector<4xf32>, %135 : vector<2xf32> -> vector<4xf32> | |
| %139 = spirv.IAdd %114, %cst97356_i32 : i32 | |
| %140 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %140, %138 : vector<4xf32> | |
| %141 = spirv.Bitcast %90 : vector<4xf16> to vector<2xf32> | |
| %142 = spirv.Bitcast %87 : vector<4xf16> to vector<2xf32> | |
| %143 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %142 : vector<2xf32> -> vector<4xf32> | |
| %144 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %143 : vector<4xf32>, %141 : vector<2xf32> -> vector<4xf32> | |
| %145 = spirv.IAdd %114, %cst98508_i32 : i32 | |
| %146 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %146, %144 : vector<4xf32> | |
| %147 = spirv.Bitcast %96 : vector<4xf16> to vector<2xf32> | |
| %148 = spirv.Bitcast %93 : vector<4xf16> to vector<2xf32> | |
| %149 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %148 : vector<2xf32> -> vector<4xf32> | |
| %150 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %149 : vector<4xf32>, %147 : vector<2xf32> -> vector<4xf32> | |
| %151 = spirv.IAdd %114, %cst99660_i32 : i32 | |
| %152 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %151] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %152, %150 : vector<4xf32> | |
| %153 = spirv.Bitcast %102 : vector<4xf16> to vector<2xf32> | |
| %154 = spirv.Bitcast %99 : vector<4xf16> to vector<2xf32> | |
| %155 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %154 : vector<2xf32> -> vector<4xf32> | |
| %156 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %155 : vector<4xf32>, %153 : vector<2xf32> -> vector<4xf32> | |
| %157 = spirv.IAdd %114, %cst100812_i32 : i32 | |
| %158 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %158, %156 : vector<4xf32> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_12_generic_2x320x9216x36, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_12_generic_2x320x9216x36 "LocalSize", 32, 8, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_13 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_13_generic_5898240 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index): | |
| %c46080 = arith.constant 46080 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c46080, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_13_generic_5898240() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst8_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst16_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[0 : i32] : vector<3xi32> | |
| %20 = spirv.IMul %17, %cst32_i32 : i32 | |
| %21 = spirv.IAdd %20, %19 : i32 | |
| %22 = spirv.IAdd %21, %9 : i32 | |
| %23 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %24 = spirv.Load "StorageBuffer" %23 : vector<4xf16> | |
| %25 = spirv.FConvert %24 : vector<4xf16> to vector<4xf32> | |
| %26 = spirv.IAdd %21, %15 : i32 | |
| %27 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %26] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %27, %25 : vector<4xf32> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_13_generic_5898240, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_13_generic_5898240 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_14 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_14_generic_2x32x92160 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [512 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c32 = arith.constant 32 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c32, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_14_generic_2x32x92160() "None" { | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst15_i32 = spirv.Constant 15 : i32 | |
| %cst_f32 = spirv.Constant 9.216000e+04 : f32 | |
| %cst2048_i32 = spirv.Constant 2048 : i32 | |
| %cst92160_i32 = spirv.Constant 92160 : i32 | |
| %cst_f32_0 = spirv.Constant 0.000000e+00 : f32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %7 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %8 = spirv.Select %6, %7, %3 : i1, i32 | |
| %9 = spirv.SDiv %8, %cst16_i32 : i32 | |
| %10 = spirv.ISub %cst-1_i32, %9 : i32 | |
| %11 = spirv.Select %6, %10, %9 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %12 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %13 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %14 = spirv.Select %12, %13, %5 : i1, i32 | |
| %15 = spirv.SDiv %14, %cst4_i32 : i32 | |
| %16 = spirv.ISub %cst-1_i32, %15 : i32 | |
| %17 = spirv.Select %12, %16, %15 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32> | |
| %22 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>) | |
| ^bb1(%59: i32, %60: vector<4xf32>): // 2 preds: ^bb0, ^bb2 | |
| %61 = spirv.SLessThan %59, %cst92160_i32 : i32 | |
| spirv.BranchConditional %61, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %62 = spirv.SLessThan %59, %cst0_i32 : i32 | |
| %63 = spirv.ISub %cst-1_i32, %59 : i32 | |
| %64 = spirv.Select %62, %63, %59 : i1, i32 | |
| %65 = spirv.SDiv %64, %cst4_i32 : i32 | |
| %66 = spirv.ISub %cst-1_i32, %65 : i32 | |
| %67 = spirv.Select %62, %66, %65 : i1, i32 | |
| %68 = spirv.IMul %19, %cst737280_i32 : i32 | |
| %69 = spirv.IAdd %1, %68 : i32 | |
| %70 = spirv.IMul %21, %cst23040_i32 : i32 | |
| %71 = spirv.IAdd %69, %70 : i32 | |
| %72 = spirv.IAdd %67, %71 : i32 | |
| %73 = spirv.IAdd %72, %11 : i32 | |
| %74 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %75 = spirv.Load "StorageBuffer" %74 : vector<4xf32> | |
| %76 = spirv.FAdd %75, %60 : vector<4xf32> | |
| spirv.Store "Function" %22, %76 : vector<4xf32> | |
| %77 = spirv.IAdd %59, %cst2048_i32 : i32 | |
| spirv.Branch ^bb1(%77, %76 : i32, vector<4xf32>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %23 = spirv.Load "Function" %22 : vector<4xf32> | |
| %24 = spirv.CompositeExtract %23[0 : i32] : vector<4xf32> | |
| %25 = spirv.CompositeExtract %23[1 : i32] : vector<4xf32> | |
| %26 = spirv.CompositeExtract %23[2 : i32] : vector<4xf32> | |
| %27 = spirv.CompositeExtract %23[3 : i32] : vector<4xf32> | |
| %28 = spirv.FAdd %24, %25 : f32 | |
| %29 = spirv.FAdd %28, %26 : f32 | |
| %30 = spirv.FAdd %29, %27 : f32 | |
| %31 = spirv.GroupNonUniformShuffleXor <Subgroup> %30, %cst1_i32 : f32, i32 | |
| %32 = spirv.FAdd %30, %31 : f32 | |
| %33 = spirv.GroupNonUniformShuffleXor <Subgroup> %32, %cst2_i32 : f32, i32 | |
| %34 = spirv.FAdd %32, %33 : f32 | |
| %35 = spirv.GroupNonUniformShuffleXor <Subgroup> %34, %cst4_i32 : f32, i32 | |
| %36 = spirv.FAdd %34, %35 : f32 | |
| %37 = spirv.GroupNonUniformShuffleXor <Subgroup> %36, %cst8_i32 : f32, i32 | |
| %38 = spirv.FAdd %36, %37 : f32 | |
| %39 = spirv.GroupNonUniformShuffleXor <Subgroup> %38, %cst16_i32 : f32, i32 | |
| %40 = spirv.FAdd %38, %39 : f32 | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup> | |
| %41 = spirv.UDiv %1, %cst32_i32 : i32 | |
| %42 = spirv.UMod %1, %cst32_i32 : i32 | |
| %43 = spirv.IEqual %42, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %43, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %59 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %59, %40 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %44 = spirv.GL.UMin %42, %cst15_i32 : i32 | |
| %45 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %44] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32 | |
| %46 = spirv.Load "Workgroup" %45 : f32 | |
| %47 = spirv.GroupNonUniformShuffleXor <Subgroup> %46, %cst1_i32 : f32, i32 | |
| %48 = spirv.FAdd %46, %47 : f32 | |
| %49 = spirv.GroupNonUniformShuffleXor <Subgroup> %48, %cst2_i32 : f32, i32 | |
| %50 = spirv.FAdd %48, %49 : f32 | |
| %51 = spirv.GroupNonUniformShuffleXor <Subgroup> %50, %cst4_i32 : f32, i32 | |
| %52 = spirv.FAdd %50, %51 : f32 | |
| %53 = spirv.GroupNonUniformShuffleXor <Subgroup> %52, %cst8_i32 : f32, i32 | |
| %54 = spirv.FAdd %52, %53 : f32 | |
| %55 = spirv.GroupNonUniformShuffle <Subgroup> %54, %cst0_i32 : f32, i32 | |
| %56 = spirv.FAdd %55, %cst_f32_0 : f32 | |
| %57 = spirv.FDiv %56, %cst_f32 : f32 | |
| %58 = spirv.IEqual %1, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %58, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %59 = spirv.IMul %19, %cst32_i32 : i32 | |
| %60 = spirv.IAdd %59, %21 : i32 | |
| %61 = spirv.IAdd %60, %17 : i32 | |
| %62 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %62, %57 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_14_generic_2x32x92160, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_14_generic_2x32x92160 "LocalSize", 512, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_15 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_15_generic_64x92160 ordinal(0) layout(#pipeline_layout4) attributes {translation_info = #translation3, workgroup_size = [512 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c64 = arith.constant 64 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c64, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_15_generic_64x92160() "None" { | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst15_i32 = spirv.Constant 15 : i32 | |
| %cst2048_i32 = spirv.Constant 2048 : i32 | |
| %cst92160_i32 = spirv.Constant 92160 : i32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %7 = spirv.Load "PushConstant" %6 : i32 | |
| %8 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %9 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %10 = spirv.Select %8, %9, %3 : i1, i32 | |
| %11 = spirv.SDiv %10, %cst16_i32 : i32 | |
| %12 = spirv.ISub %cst-1_i32, %11 : i32 | |
| %13 = spirv.Select %8, %12, %11 : i1, i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %14 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %16 = spirv.Select %14, %15, %5 : i1, i32 | |
| %17 = spirv.SDiv %16, %cst4_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %17 : i32 | |
| %19 = spirv.Select %14, %18, %17 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %20 = spirv.SLessThan %7, %cst0_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %22 = spirv.Select %20, %21, %7 : i1, i32 | |
| %23 = spirv.SDiv %22, %cst4_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %23 : i32 | |
| %25 = spirv.Select %20, %24, %23 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %26 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32> | |
| %28 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>) | |
| ^bb1(%67: i32, %68: vector<4xf32>): // 2 preds: ^bb0, ^bb2 | |
| %69 = spirv.SLessThan %67, %cst92160_i32 : i32 | |
| spirv.BranchConditional %69, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %70 = spirv.IAdd %27, %19 : i32 | |
| %71 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %72 = spirv.Load "StorageBuffer" %71 : f32 | |
| %73 = spirv.CompositeConstruct %72, %72, %72, %72 : (f32, f32, f32, f32) -> vector<4xf32> | |
| %74 = spirv.SLessThan %67, %cst0_i32 : i32 | |
| %75 = spirv.ISub %cst-1_i32, %67 : i32 | |
| %76 = spirv.Select %74, %75, %67 : i1, i32 | |
| %77 = spirv.SDiv %76, %cst4_i32 : i32 | |
| %78 = spirv.ISub %cst-1_i32, %77 : i32 | |
| %79 = spirv.Select %74, %78, %77 : i1, i32 | |
| %80 = spirv.IMul %27, %cst23040_i32 : i32 | |
| %81 = spirv.IAdd %1, %80 : i32 | |
| %82 = spirv.IAdd %79, %81 : i32 | |
| %83 = spirv.IAdd %82, %13 : i32 | |
| %84 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %85 = spirv.Load "StorageBuffer" %84 : vector<4xf32> | |
| %86 = spirv.FSub %85, %73 : vector<4xf32> | |
| %87 = spirv.FMul %86, %86 : vector<4xf32> | |
| %88 = spirv.FAdd %87, %68 : vector<4xf32> | |
| spirv.Store "Function" %28, %88 : vector<4xf32> | |
| %89 = spirv.IAdd %67, %cst2048_i32 : i32 | |
| spirv.Branch ^bb1(%89, %88 : i32, vector<4xf32>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %29 = spirv.Load "Function" %28 : vector<4xf32> | |
| %30 = spirv.IAdd %27, %25 : i32 | |
| %31 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %32 = spirv.Load "StorageBuffer" %31 : f32 | |
| %33 = spirv.CompositeExtract %29[0 : i32] : vector<4xf32> | |
| %34 = spirv.CompositeExtract %29[1 : i32] : vector<4xf32> | |
| %35 = spirv.CompositeExtract %29[2 : i32] : vector<4xf32> | |
| %36 = spirv.CompositeExtract %29[3 : i32] : vector<4xf32> | |
| %37 = spirv.FAdd %33, %34 : f32 | |
| %38 = spirv.FAdd %37, %35 : f32 | |
| %39 = spirv.FAdd %38, %36 : f32 | |
| %40 = spirv.GroupNonUniformShuffleXor <Subgroup> %39, %cst1_i32 : f32, i32 | |
| %41 = spirv.FAdd %39, %40 : f32 | |
| %42 = spirv.GroupNonUniformShuffleXor <Subgroup> %41, %cst2_i32 : f32, i32 | |
| %43 = spirv.FAdd %41, %42 : f32 | |
| %44 = spirv.GroupNonUniformShuffleXor <Subgroup> %43, %cst4_i32 : f32, i32 | |
| %45 = spirv.FAdd %43, %44 : f32 | |
| %46 = spirv.GroupNonUniformShuffleXor <Subgroup> %45, %cst8_i32 : f32, i32 | |
| %47 = spirv.FAdd %45, %46 : f32 | |
| %48 = spirv.GroupNonUniformShuffleXor <Subgroup> %47, %cst16_i32 : f32, i32 | |
| %49 = spirv.FAdd %47, %48 : f32 | |
| %__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup> | |
| %50 = spirv.UDiv %1, %cst32_i32 : i32 | |
| %51 = spirv.UMod %1, %cst32_i32 : i32 | |
| %52 = spirv.IEqual %51, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %52, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %67 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %50] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %67, %49 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %53 = spirv.GL.UMin %51, %cst15_i32 : i32 | |
| %54 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32 | |
| %55 = spirv.Load "Workgroup" %54 : f32 | |
| %56 = spirv.GroupNonUniformShuffleXor <Subgroup> %55, %cst1_i32 : f32, i32 | |
| %57 = spirv.FAdd %55, %56 : f32 | |
| %58 = spirv.GroupNonUniformShuffleXor <Subgroup> %57, %cst2_i32 : f32, i32 | |
| %59 = spirv.FAdd %57, %58 : f32 | |
| %60 = spirv.GroupNonUniformShuffleXor <Subgroup> %59, %cst4_i32 : f32, i32 | |
| %61 = spirv.FAdd %59, %60 : f32 | |
| %62 = spirv.GroupNonUniformShuffleXor <Subgroup> %61, %cst8_i32 : f32, i32 | |
| %63 = spirv.FAdd %61, %62 : f32 | |
| %64 = spirv.GroupNonUniformShuffle <Subgroup> %63, %cst0_i32 : f32, i32 | |
| %65 = spirv.FAdd %64, %32 : f32 | |
| %66 = spirv.IEqual %1, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %66, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| spirv.Store "StorageBuffer" %31, %65 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_15_generic_64x92160, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_15_generic_64x92160 "LocalSize", 512, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_16 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_16_generic_64x10x9216 ordinal(0) layout(#pipeline_layout5) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c72 = arith.constant 72 : index | |
| %c10 = arith.constant 10 : index | |
| %c64 = arith.constant 64 : index | |
| hal.return %c72, %c10, %c64 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_16_generic_64x10x9216() "None" { | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 9.216000e+04 : f32 | |
| %cst_f32_0 = spirv.Constant 9.99999974E-6 : f32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %7 = spirv.Load "PushConstant" %6 : i32 | |
| %8 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %9 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %10 = spirv.Select %8, %9, %1 : i1, i32 | |
| %11 = spirv.SDiv %10, %cst8_i32 : i32 | |
| %12 = spirv.ISub %cst-1_i32, %11 : i32 | |
| %13 = spirv.Select %8, %12, %11 : i1, i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %14 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %16 = spirv.Select %14, %15, %3 : i1, i32 | |
| %17 = spirv.SDiv %16, %cst4_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %17 : i32 | |
| %19 = spirv.Select %14, %18, %17 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %20 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %22 = spirv.Select %20, %21, %5 : i1, i32 | |
| %23 = spirv.SDiv %22, %cst4_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %23 : i32 | |
| %25 = spirv.Select %20, %24, %23 : i1, i32 | |
| %26 = spirv.SLessThan %7, %cst0_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %28 = spirv.Select %26, %27, %7 : i1, i32 | |
| %29 = spirv.SDiv %28, %cst8_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %29 : i32 | |
| %31 = spirv.Select %26, %30, %29 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32> | |
| %34 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %35 = spirv.CompositeExtract %34[1 : i32] : vector<3xi32> | |
| %36 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %37 = spirv.CompositeExtract %36[0 : i32] : vector<3xi32> | |
| %38 = spirv.FConvert %cst_f32_0 : f32 to f16 | |
| %39 = spirv.CompositeConstruct %38, %38, %38, %38 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %41 = spirv.CompositeExtract %40[0 : i32] : vector<3xi32> | |
| %42 = spirv.IMul %37, %cst32_i32 : i32 | |
| %43 = spirv.IAdd %42, %41 : i32 | |
| %44 = spirv.IMul %33, %cst23040_i32 : i32 | |
| %45 = spirv.IAdd %43, %44 : i32 | |
| %46 = spirv.IMul %35, %cst2304_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IAdd %47, %13 : i32 | |
| %49 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %50 = spirv.Load "StorageBuffer" %49 : vector<4xf16> | |
| %51 = spirv.IAdd %33, %19 : i32 | |
| %52 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %53 = spirv.Load "StorageBuffer" %52 : f32 | |
| %54 = spirv.IAdd %33, %25 : i32 | |
| %55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %56 = spirv.Load "StorageBuffer" %55 : f32 | |
| %57 = spirv.FDiv %56, %cst_f32 : f32 | |
| %58 = spirv.FConvert %57 : f32 to f16 | |
| %59 = spirv.CompositeConstruct %58, %58, %58, %58 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %60 = spirv.FAdd %59, %39 : vector<4xf16> | |
| %61 = spirv.FConvert %53 : f32 to f16 | |
| %62 = spirv.GL.InverseSqrt %60 : vector<4xf16> | |
| %63 = spirv.CompositeConstruct %61, %61, %61, %61 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %64 = spirv.FSub %50, %63 : vector<4xf16> | |
| %65 = spirv.FMul %64, %62 : vector<4xf16> | |
| %66 = spirv.IAdd %47, %31 : i32 | |
| %67 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %67, %65 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_16_generic_64x10x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_16_generic_64x10x9216 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_17 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_17_generic_2x320x96x96 ordinal(0) layout(#pipeline_layout6) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c3 = arith.constant 3 : index | |
| %c24 = arith.constant 24 : index | |
| %c640 = arith.constant 640 : index | |
| hal.return %c3, %c24, %c640 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_17_generic_2x320x96x96() "None" { | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst24_i32 = spirv.Constant 24 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32> | |
| %cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32> | |
| %cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32> | |
| %cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32> | |
| %cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32> | |
| %cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32> | |
| %cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32> | |
| %cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32> | |
| %cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32> | |
| %cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32> | |
| %cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32> | |
| %cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32> | |
| %cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16> | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %7 = spirv.Load "PushConstant" %6 : i32 | |
| %8 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %9 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %10 = spirv.Select %8, %9, %1 : i1, i32 | |
| %11 = spirv.SDiv %10, %cst8_i32 : i32 | |
| %12 = spirv.ISub %cst-1_i32, %11 : i32 | |
| %13 = spirv.Select %8, %12, %11 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %14 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %16 = spirv.Select %14, %15, %3 : i1, i32 | |
| %17 = spirv.SDiv %16, %cst2_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %17 : i32 | |
| %19 = spirv.Select %14, %18, %17 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %20 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %22 = spirv.Select %20, %21, %5 : i1, i32 | |
| %23 = spirv.SDiv %22, %cst2_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %23 : i32 | |
| %25 = spirv.Select %20, %24, %23 : i1, i32 | |
| %26 = spirv.SLessThan %7, %cst0_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %28 = spirv.Select %26, %27, %7 : i1, i32 | |
| %29 = spirv.SDiv %28, %cst8_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %29 : i32 | |
| %31 = spirv.Select %26, %30, %29 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32> | |
| %34 = spirv.UDiv %33, %cst320_i32 : i32 | |
| %35 = spirv.UMod %33, %cst320_i32 : i32 | |
| %36 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %37 = spirv.CompositeExtract %36[1 : i32] : vector<3xi32> | |
| %38 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %39 = spirv.CompositeExtract %38[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %41 = spirv.CompositeExtract %40[1 : i32] : vector<3xi32> | |
| %42 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %43 = spirv.CompositeExtract %42[0 : i32] : vector<3xi32> | |
| %44 = spirv.IMul %41, %cst24_i32 : i32 | |
| %45 = spirv.IMul %37, %cst96_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IMul %39, %cst8_i32 : i32 | |
| %48 = spirv.IAdd %46, %47 : i32 | |
| %49 = spirv.IAdd %48, %43 : i32 | |
| %50 = spirv.IMul %34, %cst737280_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IMul %35, %cst2304_i32 : i32 | |
| %53 = spirv.IAdd %51, %52 : i32 | |
| %54 = spirv.IAdd %53, %13 : i32 | |
| %55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %56 = spirv.Load "StorageBuffer" %55 : vector<4xf16> | |
| %57 = spirv.IAdd %35, %19 : i32 | |
| %58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %59 = spirv.Load "StorageBuffer" %58 : f16 | |
| %60 = spirv.IAdd %35, %25 : i32 | |
| %61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %62 = spirv.Load "StorageBuffer" %61 : f16 | |
| %63 = spirv.CompositeConstruct %59, %59, %59, %59 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %64 = spirv.FMul %56, %63 : vector<4xf16> | |
| %65 = spirv.CompositeConstruct %62, %62, %62, %62 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %66 = spirv.FAdd %64, %65 : vector<4xf16> | |
| %67 = spirv.FNegate %66 : vector<4xf16> | |
| %68 = spirv.FConvert %67 : vector<4xf16> to vector<4xf32> | |
| %69 = spirv.IsNan %68 : vector<4xf32> | |
| %70 = spirv.LogicalOr %69, %69 : vector<4xi1> | |
| %71 = spirv.FMul %68, %cst_vec_4xf32_0 : vector<4xf32> | |
| %72 = spirv.GL.Floor %71 : vector<4xf32> | |
| %73 = spirv.FMul %72, %cst_vec_4xf32 : vector<4xf32> | |
| %74 = spirv.FSub %68, %73 : vector<4xf32> | |
| %75 = spirv.FMul %74, %74 : vector<4xf32> | |
| %76 = spirv.FMul %75, %75 : vector<4xf32> | |
| %77 = spirv.GL.Fma %cst_vec_4xf32_1, %74, %cst_vec_4xf32_1 : vector<4xf32> | |
| %78 = spirv.GL.Fma %cst_vec_4xf32_3, %74, %cst_vec_4xf32_2 : vector<4xf32> | |
| %79 = spirv.GL.Fma %cst_vec_4xf32_5, %74, %cst_vec_4xf32_4 : vector<4xf32> | |
| %80 = spirv.GL.Fma %78, %75, %77 : vector<4xf32> | |
| %81 = spirv.GL.Fma %79, %76, %80 : vector<4xf32> | |
| %82 = spirv.ConvertFToS %72 : vector<4xf32> to vector<4xi32> | |
| %83 = spirv.IAdd %82, %cst_vec_4xi32_6 : vector<4xi32> | |
| %84 = spirv.ShiftLeftLogical %83, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %85 = spirv.Bitcast %84 : vector<4xi32> to vector<4xf32> | |
| %86 = spirv.FMul %81, %85 : vector<4xf32> | |
| %87 = spirv.SLessThanEqual %82, %cst_vec_4xi32_6 : vector<4xi32> | |
| %88 = spirv.SGreaterThanEqual %82, %cst_vec_4xi32_11 : vector<4xi32> | |
| %89 = spirv.FOrdEqual %68, %cst_vec_4xf32_9 : vector<4xf32> | |
| %90 = spirv.FOrdEqual %68, %cst_vec_4xf32_8 : vector<4xf32> | |
| %91 = spirv.FOrdGreaterThan %68, %cst_vec_4xf32_7 : vector<4xf32> | |
| %92 = spirv.LogicalAnd %87, %88 : vector<4xi1> | |
| %93 = spirv.Select %91, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %94 = spirv.Select %92, %86, %93 : vector<4xi1>, vector<4xf32> | |
| %95 = spirv.Select %90, %cst_vec_4xf32_8, %94 : vector<4xi1>, vector<4xf32> | |
| %96 = spirv.Select %89, %cst_vec_4xf32_7, %95 : vector<4xi1>, vector<4xf32> | |
| %97 = spirv.Select %70, %68, %96 : vector<4xi1>, vector<4xf32> | |
| %98 = spirv.FConvert %97 : vector<4xf32> to vector<4xf16> | |
| %99 = spirv.FAdd %98, %cst_vec_4xf16 : vector<4xf16> | |
| %100 = spirv.FDiv %cst_vec_4xf16, %99 : vector<4xf16> | |
| %101 = spirv.FMul %100, %66 : vector<4xf16> | |
| %102 = spirv.IAdd %53, %31 : i32 | |
| %103 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %103, %101 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_17_generic_2x320x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_17_generic_2x320x96x96 "LocalSize", 8, 4, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_18 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_18 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c3 = arith.constant 3 : index | |
| %c96 = arith.constant 96 : index | |
| %c640 = arith.constant 640 : index | |
| hal.return %c3, %c96, %c640 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_18() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst99_i32 = spirv.Constant 99 : i32 | |
| %cst3073280_i32 = spirv.Constant 3073280 : i32 | |
| %cst98_i32 = spirv.Constant 98 : i32 | |
| %cst9604_i32 = spirv.Constant 9604 : i32 | |
| %cst2949120_i32 = spirv.Constant 2949120 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[2 : i32] : vector<3xi32> | |
| %22 = spirv.UMod %21, %cst320_i32 : i32 | |
| %23 = spirv.UDiv %21, %cst320_i32 : i32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32> | |
| %26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32> | |
| %28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32> | |
| %30 = spirv.IMul %22, %cst9216_i32 : i32 | |
| %31 = spirv.IMul %29, %cst9216_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IMul %19, %cst96_i32 : i32 | |
| %34 = spirv.IAdd %32, %33 : i32 | |
| %35 = spirv.IMul %27, %cst96_i32 : i32 | |
| %36 = spirv.IAdd %34, %35 : i32 | |
| %37 = spirv.IAdd %36, %25 : i32 | |
| %38 = spirv.IMul %17, %cst32_i32 : i32 | |
| %39 = spirv.IAdd %37, %38 : i32 | |
| %40 = spirv.IMul %23, %cst2949120_i32 : i32 | |
| %41 = spirv.IAdd %39, %40 : i32 | |
| %42 = spirv.IAdd %41, %9 : i32 | |
| %43 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %42] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %44 = spirv.Load "StorageBuffer" %43 : f16 | |
| %45 = spirv.IMul %22, %cst9604_i32 : i32 | |
| %46 = spirv.IMul %29, %cst9604_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IMul %19, %cst98_i32 : i32 | |
| %49 = spirv.IAdd %47, %48 : i32 | |
| %50 = spirv.IMul %27, %cst98_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IAdd %51, %25 : i32 | |
| %53 = spirv.IAdd %52, %38 : i32 | |
| %54 = spirv.IMul %23, %cst3073280_i32 : i32 | |
| %55 = spirv.IAdd %53, %54 : i32 | |
| %56 = spirv.IAdd %55, %15 : i32 | |
| %57 = spirv.IAdd %56, %cst99_i32 : i32 | |
| %58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %58, %44 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_18, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_18 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_19 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_19_generic_2x320x3x3x96x96 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index): | |
| %c3 = arith.constant 3 : index | |
| %c96 = arith.constant 96 : index | |
| %c5760 = arith.constant 5760 : index | |
| hal.return %c3, %c96, %c5760 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_19_generic_2x320x3x3x96x96() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst27648_i32 = spirv.Constant 27648 : i32 | |
| %cst82944_i32 = spirv.Constant 82944 : i32 | |
| %cst26542080_i32 = spirv.Constant 26542080 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst9604_i32 = spirv.Constant 9604 : i32 | |
| %cst3073280_i32 = spirv.Constant 3073280 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst98_i32 = spirv.Constant 98 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.UDiv %17, %cst3_i32 : i32 | |
| %19 = spirv.UDiv %18, %cst3_i32 : i32 | |
| %20 = spirv.UDiv %19, %cst320_i32 : i32 | |
| %21 = spirv.UMod %19, %cst320_i32 : i32 | |
| %22 = spirv.UMod %18, %cst3_i32 : i32 | |
| %23 = spirv.UMod %17, %cst3_i32 : i32 | |
| %24 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32> | |
| %26 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %29 = spirv.CompositeExtract %28[0 : i32] : vector<3xi32> | |
| %30 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %31 = spirv.CompositeExtract %30[1 : i32] : vector<3xi32> | |
| %32 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32> | |
| %34 = spirv.IMul %31, %cst98_i32 : i32 | |
| %35 = spirv.IMul %22, %cst98_i32 : i32 | |
| %36 = spirv.IAdd %34, %35 : i32 | |
| %37 = spirv.IMul %25, %cst98_i32 : i32 | |
| %38 = spirv.IAdd %36, %37 : i32 | |
| %39 = spirv.IAdd %38, %23 : i32 | |
| %40 = spirv.IMul %27, %cst32_i32 : i32 | |
| %41 = spirv.IAdd %39, %40 : i32 | |
| %42 = spirv.IAdd %41, %33 : i32 | |
| %43 = spirv.IAdd %42, %29 : i32 | |
| %44 = spirv.IMul %20, %cst3073280_i32 : i32 | |
| %45 = spirv.IAdd %43, %44 : i32 | |
| %46 = spirv.IMul %21, %cst9604_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IAdd %47, %9 : i32 | |
| %49 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %50 = spirv.Load "StorageBuffer" %49 : f16 | |
| %51 = spirv.IMul %23, %cst9216_i32 : i32 | |
| %52 = spirv.IMul %33, %cst9216_i32 : i32 | |
| %53 = spirv.IAdd %51, %52 : i32 | |
| %54 = spirv.IMul %25, %cst96_i32 : i32 | |
| %55 = spirv.IAdd %53, %54 : i32 | |
| %56 = spirv.IMul %31, %cst96_i32 : i32 | |
| %57 = spirv.IAdd %55, %56 : i32 | |
| %58 = spirv.IAdd %57, %29 : i32 | |
| %59 = spirv.IAdd %58, %40 : i32 | |
| %60 = spirv.IMul %20, %cst26542080_i32 : i32 | |
| %61 = spirv.IAdd %59, %60 : i32 | |
| %62 = spirv.IMul %21, %cst82944_i32 : i32 | |
| %63 = spirv.IAdd %61, %62 : i32 | |
| %64 = spirv.IMul %22, %cst27648_i32 : i32 | |
| %65 = spirv.IAdd %63, %64 : i32 | |
| %66 = spirv.IAdd %65, %15 : i32 | |
| %67 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %67, %50 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_19_generic_2x320x3x3x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_19_generic_2x320x3x3x96x96 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_20 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_20_generic_2x1280 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c10 = arith.constant 10 : index | |
| %c2 = arith.constant 2 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c10, %c2, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_20_generic_2x1280() "None" { | |
| %cst23264_i32 = spirv.Constant 23264 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst216398400_i32 = spirv.Constant 216398400 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32> | |
| %cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32> | |
| %cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32> | |
| %cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32> | |
| %cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32> | |
| %cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32> | |
| %cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32> | |
| %cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32> | |
| %cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32> | |
| %cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32> | |
| %cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32> | |
| %cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32> | |
| %cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16> | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32> | |
| %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32> | |
| %6 = spirv.IMul %3, %cst32_i32 : i32 | |
| %7 = spirv.IAdd %6, %5 : i32 | |
| %8 = spirv.IAdd %7, %cst216398400_i32 : i32 | |
| %9 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %8] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %10 = spirv.Load "StorageBuffer" %9 : vector<4xf16> | |
| %11 = spirv.IMul %1, %cst320_i32 : i32 | |
| %12 = spirv.IAdd %7, %11 : i32 | |
| %13 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %12] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %14 = spirv.Load "StorageBuffer" %13 : vector<4xf16> | |
| %15 = spirv.FAdd %10, %14 : vector<4xf16> | |
| %16 = spirv.FNegate %15 : vector<4xf16> | |
| %17 = spirv.FConvert %16 : vector<4xf16> to vector<4xf32> | |
| %18 = spirv.IsNan %17 : vector<4xf32> | |
| %19 = spirv.LogicalOr %18, %18 : vector<4xi1> | |
| %20 = spirv.FMul %17, %cst_vec_4xf32_0 : vector<4xf32> | |
| %21 = spirv.GL.Floor %20 : vector<4xf32> | |
| %22 = spirv.FMul %21, %cst_vec_4xf32 : vector<4xf32> | |
| %23 = spirv.FSub %17, %22 : vector<4xf32> | |
| %24 = spirv.FMul %23, %23 : vector<4xf32> | |
| %25 = spirv.FMul %24, %24 : vector<4xf32> | |
| %26 = spirv.GL.Fma %cst_vec_4xf32_1, %23, %cst_vec_4xf32_1 : vector<4xf32> | |
| %27 = spirv.GL.Fma %cst_vec_4xf32_3, %23, %cst_vec_4xf32_2 : vector<4xf32> | |
| %28 = spirv.GL.Fma %cst_vec_4xf32_5, %23, %cst_vec_4xf32_4 : vector<4xf32> | |
| %29 = spirv.GL.Fma %27, %24, %26 : vector<4xf32> | |
| %30 = spirv.GL.Fma %28, %25, %29 : vector<4xf32> | |
| %31 = spirv.ConvertFToS %21 : vector<4xf32> to vector<4xi32> | |
| %32 = spirv.IAdd %31, %cst_vec_4xi32_6 : vector<4xi32> | |
| %33 = spirv.ShiftLeftLogical %32, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %34 = spirv.Bitcast %33 : vector<4xi32> to vector<4xf32> | |
| %35 = spirv.FMul %30, %34 : vector<4xf32> | |
| %36 = spirv.SLessThanEqual %31, %cst_vec_4xi32_6 : vector<4xi32> | |
| %37 = spirv.SGreaterThanEqual %31, %cst_vec_4xi32_11 : vector<4xi32> | |
| %38 = spirv.FOrdEqual %17, %cst_vec_4xf32_9 : vector<4xf32> | |
| %39 = spirv.FOrdEqual %17, %cst_vec_4xf32_8 : vector<4xf32> | |
| %40 = spirv.FOrdGreaterThan %17, %cst_vec_4xf32_7 : vector<4xf32> | |
| %41 = spirv.LogicalAnd %36, %37 : vector<4xi1> | |
| %42 = spirv.Select %40, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %43 = spirv.Select %41, %35, %42 : vector<4xi1>, vector<4xf32> | |
| %44 = spirv.Select %39, %cst_vec_4xf32_8, %43 : vector<4xi1>, vector<4xf32> | |
| %45 = spirv.Select %38, %cst_vec_4xf32_7, %44 : vector<4xi1>, vector<4xf32> | |
| %46 = spirv.Select %19, %17, %45 : vector<4xi1>, vector<4xf32> | |
| %47 = spirv.FConvert %46 : vector<4xf32> to vector<4xf16> | |
| %48 = spirv.FAdd %47, %cst_vec_4xf16 : vector<4xf16> | |
| %49 = spirv.FDiv %cst_vec_4xf16, %48 : vector<4xf16> | |
| %50 = spirv.FMul %49, %15 : vector<4xf16> | |
| %51 = spirv.IAdd %12, %cst23264_i32 : i32 | |
| %52 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %52, %50 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_20_generic_2x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_20_generic_2x1280 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_21 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_21_matmul_32x320x1280 ordinal(0) layout(#pipeline_layout7) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [128 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c5 = arith.constant 5 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_21_matmul_32x320x1280() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst-320_i32 = spirv.Constant -320 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst11632_i32 = spirv.Constant 11632 : i32 | |
| %cst156_i32 = spirv.Constant 156 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst5120_i32 = spirv.Constant 5120 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst1248_i32 = spirv.Constant 1248 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %9 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %10 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %11 = spirv.Select %9, %10, %8 : i1, i32 | |
| %12 = spirv.SDiv %11, %cst16_i32 : i32 | |
| %13 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %14 = spirv.Select %9, %13, %12 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %15 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %16 = spirv.CompositeExtract %15[1 : i32] : vector<3xi32> | |
| %17 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %18 = spirv.CompositeExtract %17[0 : i32] : vector<3xi32> | |
| %19 = spirv.IMul %4, %cst5120_i32 : i32 | |
| %20 = spirv.IAdd %2, %19 : i32 | |
| %21 = spirv.IMul %6, %cst5120_i32 : i32 | |
| %22 = spirv.IAdd %20, %21 : i32 | |
| %23 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %25 = spirv.Select %23, %24, %2 : i1, i32 | |
| %26 = spirv.SDiv %25, %cst4_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %26 : i32 | |
| %28 = spirv.Select %23, %27, %26 : i1, i32 | |
| %29 = spirv.IMul %28, %cst156_i32 : i32 | |
| %30 = spirv.IAdd %22, %29 : i32 | |
| %31 = spirv.IAdd %30, %cst11632_i32 : i32 | |
| %32 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %33 = spirv.Load "StorageBuffer" %32 : vector<4xf32> | |
| %34 = spirv.IMul %4, %cst160_i32 : i32 | |
| %35 = spirv.IAdd %2, %34 : i32 | |
| %36 = spirv.IMul %6, %cst160_i32 : i32 | |
| %37 = spirv.IAdd %35, %36 : i32 | |
| %38 = spirv.IAdd %37, %28 : i32 | |
| %39 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %39, %33 : vector<4xf32> | |
| %40 = spirv.IMul %4, %cst640_i32 : i32 | |
| %41 = spirv.IAdd %2, %40 : i32 | |
| %42 = spirv.IMul %6, %cst640_i32 : i32 | |
| %43 = spirv.IAdd %41, %42 : i32 | |
| %44 = spirv.IMul %18, %cst8_i32 : i32 | |
| %45 = spirv.IAdd %43, %44 : i32 | |
| %46 = spirv.IAdd %45, %14 : i32 | |
| %47 = spirv.SDiv %25, %cst8_i32 : i32 | |
| %48 = spirv.ISub %cst-1_i32, %47 : i32 | |
| %49 = spirv.Select %23, %48, %47 : i1, i32 | |
| %50 = spirv.IMul %49, %cst32_i32 : i32 | |
| %51 = spirv.IAdd %46, %50 : i32 | |
| %52 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %53 = spirv.Load "StorageBuffer" %52 : vector<4xf32> | |
| %54 = spirv.IMul %4, %cst144_i32 : i32 | |
| %55 = spirv.IAdd %2, %54 : i32 | |
| %56 = spirv.IMul %6, %cst144_i32 : i32 | |
| %57 = spirv.IAdd %55, %56 : i32 | |
| %58 = spirv.IAdd %57, %49 : i32 | |
| %59 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %59, %53 : vector<4xf32> | |
| %60 = spirv.IAdd %51, %cst640_i32 : i32 | |
| %61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %62 = spirv.Load "StorageBuffer" %61 : vector<4xf32> | |
| %63 = spirv.IAdd %58, %cst144_i32 : i32 | |
| %64 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %63] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %64, %62 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %65 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %66 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %67 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%106: i32, %107: !spirv.coopmatrix<16x16xf16, Subgroup>, %108: !spirv.coopmatrix<16x16xf16, Subgroup>, %109: i32): // 2 preds: ^bb0, ^bb2 | |
| %110 = spirv.SLessThan %106, %cst1248_i32 : i32 | |
| spirv.BranchConditional %110, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %111 = spirv.IMul %109, %cst160_i32 : i32 | |
| %112 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %111] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %113 = spirv.NV.CooperativeMatrixLoad %112, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %114 = spirv.IAdd %111, %cst2_i32 : i32 | |
| %115 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %116 = spirv.NV.CooperativeMatrixLoad %115, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %117 = spirv.IAdd %111, %cst80_i32 : i32 | |
| %118 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %119 = spirv.NV.CooperativeMatrixLoad %118, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %120 = spirv.IAdd %111, %cst82_i32 : i32 | |
| %121 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %123 = spirv.IMul %109, %cst288_i32 : i32 | |
| %124 = spirv.SDiv %25, %cst32_i32 : i32 | |
| %125 = spirv.ISub %cst-1_i32, %124 : i32 | |
| %126 = spirv.Select %23, %125, %124 : i1, i32 | |
| %127 = spirv.IMul %126, %cst2_i32 : i32 | |
| %128 = spirv.IAdd %123, %127 : i32 | |
| %129 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.IAdd %128, %cst144_i32 : i32 | |
| %132 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %134 = spirv.NV.CooperativeMatrixMulAdd %113, %130, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %135 = spirv.NV.CooperativeMatrixMulAdd %116, %133, %134 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %136 = spirv.NV.CooperativeMatrixMulAdd %119, %130, %108 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %137 = spirv.NV.CooperativeMatrixMulAdd %122, %133, %136 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %138 = spirv.IAdd %106, %cst32_i32 : i32 | |
| %139 = spirv.SLessThan %138, %cst0_i32 : i32 | |
| %140 = spirv.ISub %cst-33_i32, %106 : i32 | |
| %141 = spirv.Select %139, %140, %138 : i1, i32 | |
| %142 = spirv.SDiv %141, %cst8_i32 : i32 | |
| %143 = spirv.ISub %cst-1_i32, %142 : i32 | |
| %144 = spirv.Select %139, %143, %142 : i1, i32 | |
| %145 = spirv.IAdd %22, %144 : i32 | |
| %146 = spirv.IAdd %145, %29 : i32 | |
| %147 = spirv.IAdd %146, %cst11632_i32 : i32 | |
| %148 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %147] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %149 = spirv.Load "StorageBuffer" %148 : vector<4xf32> | |
| %150 = spirv.SDiv %141, %cst32_i32 : i32 | |
| %151 = spirv.ISub %cst-1_i32, %150 : i32 | |
| %152 = spirv.Select %139, %151, %150 : i1, i32 | |
| %153 = spirv.GL.SAbs %152 : i32 | |
| %154 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %155 = spirv.UMod %153, %154 : i32 | |
| %156 = spirv.IEqual %152, %153 : i32 | |
| %157 = spirv.SNegate %155 : i32 | |
| %158 = spirv.Select %156, %155, %157 : i1, i32 | |
| %159 = spirv.SLessThan %158, %cst0_i32 : i32 | |
| %160 = spirv.IAdd %158, %cst2_i32 : i32 | |
| %161 = spirv.Select %159, %160, %158 : i1, i32 | |
| %162 = spirv.IMul %152, %cst160_i32 : i32 | |
| %163 = spirv.IAdd %162, %37 : i32 | |
| %164 = spirv.SLessThan %152, %cst0_i32 : i32 | |
| %165 = spirv.ISub %cst-1_i32, %152 : i32 | |
| %166 = spirv.Select %164, %165, %152 : i1, i32 | |
| %167 = spirv.SDiv %166, %cst2_i32 : i32 | |
| %168 = spirv.ISub %cst-1_i32, %167 : i32 | |
| %169 = spirv.Select %164, %168, %167 : i1, i32 | |
| %170 = spirv.IMul %169, %cst-320_i32 : i32 | |
| %171 = spirv.IAdd %163, %170 : i32 | |
| %172 = spirv.IAdd %171, %28 : i32 | |
| %173 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %173, %149 : vector<4xf32> | |
| %174 = spirv.IMul %138, %cst40_i32 : i32 | |
| %175 = spirv.IAdd %174, %2 : i32 | |
| %176 = spirv.IAdd %175, %40 : i32 | |
| %177 = spirv.IAdd %176, %42 : i32 | |
| %178 = spirv.IAdd %177, %44 : i32 | |
| %179 = spirv.IAdd %178, %14 : i32 | |
| %180 = spirv.IAdd %179, %50 : i32 | |
| %181 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %182 = spirv.Load "StorageBuffer" %181 : vector<4xf32> | |
| %183 = spirv.IMul %152, %cst288_i32 : i32 | |
| %184 = spirv.IAdd %183, %57 : i32 | |
| %185 = spirv.IMul %169, %cst-576_i32 : i32 | |
| %186 = spirv.IAdd %184, %185 : i32 | |
| %187 = spirv.IAdd %186, %49 : i32 | |
| %188 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %188, %182 : vector<4xf32> | |
| %189 = spirv.IAdd %180, %cst640_i32 : i32 | |
| %190 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %189] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %191 = spirv.Load "StorageBuffer" %190 : vector<4xf32> | |
| %192 = spirv.IAdd %187, %cst144_i32 : i32 | |
| %193 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %192] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %193, %191 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %65, %135 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %66, %137 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %67, %161 : i32 | |
| spirv.Branch ^bb1(%138, %135, %137, %161 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %68 = spirv.Load "Function" %67 : i32 | |
| %69 = spirv.Load "Function" %66 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %70 = spirv.Load "Function" %65 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %71 = spirv.IMul %68, %cst160_i32 : i32 | |
| %72 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %71] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %73 = spirv.NV.CooperativeMatrixLoad %72, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %74 = spirv.IAdd %71, %cst2_i32 : i32 | |
| %75 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %76 = spirv.NV.CooperativeMatrixLoad %75, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %77 = spirv.IAdd %71, %cst80_i32 : i32 | |
| %78 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %79 = spirv.NV.CooperativeMatrixLoad %78, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %80 = spirv.IAdd %71, %cst82_i32 : i32 | |
| %81 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %80] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %82 = spirv.NV.CooperativeMatrixLoad %81, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %83 = spirv.IMul %68, %cst288_i32 : i32 | |
| %84 = spirv.SDiv %25, %cst32_i32 : i32 | |
| %85 = spirv.ISub %cst-1_i32, %84 : i32 | |
| %86 = spirv.Select %23, %85, %84 : i1, i32 | |
| %87 = spirv.IMul %86, %cst2_i32 : i32 | |
| %88 = spirv.IAdd %83, %87 : i32 | |
| %89 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %90 = spirv.NV.CooperativeMatrixLoad %89, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %91 = spirv.IAdd %88, %cst144_i32 : i32 | |
| %92 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %93 = spirv.NV.CooperativeMatrixLoad %92, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %94 = spirv.NV.CooperativeMatrixMulAdd %73, %90, %70 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %95 = spirv.NV.CooperativeMatrixMulAdd %76, %93, %94 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %96 = spirv.NV.CooperativeMatrixMulAdd %79, %90, %69 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %97 = spirv.NV.CooperativeMatrixMulAdd %82, %93, %96 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %98 = spirv.IMul %16, %cst1280_i32 : i32 | |
| %99 = spirv.IMul %4, %cst1280_i32 : i32 | |
| %100 = spirv.IAdd %98, %99 : i32 | |
| %101 = spirv.IAdd %100, %44 : i32 | |
| %102 = spirv.IAdd %101, %87 : i32 | |
| %103 = spirv.IAdd %102, %cst640_i32 : i32 | |
| %104 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %104, %97, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %105 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %105, %95, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_21_matmul_32x320x1280, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_21_matmul_32x320x1280 "LocalSize", 128, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_22 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_22_generic_2x320x9216x2880 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c144 = arith.constant 144 : index | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c144, %c5, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0__1 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1__0 bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_22_generic_2x320x9216x2880() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst55296_i32 = spirv.Constant 55296 : i32 | |
| %cst432_i32 = spirv.Constant 432 : i32 | |
| %cst48_i32 = spirv.Constant 48 : i32 | |
| %cst36864_i32 = spirv.Constant 36864 : i32 | |
| %cst368640_i32 = spirv.Constant 368640 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst576_i32 = spirv.Constant 576 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst1152_i32 = spirv.Constant 1152 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst1144_i32 = spirv.Constant 1144 : i32 | |
| %cst3317760_i32 = spirv.Constant 3317760 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst18432_i32 = spirv.Constant 18432 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst356_i32 = spirv.Constant 356 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst11520_i32 = spirv.Constant 11520 : i32 | |
| %cst5760_i32 = spirv.Constant 5760 : i32 | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst2848_i32 = spirv.Constant 2848 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__8_addr = spirv.mlir.addressof @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %14 = spirv.Load "PushConstant" %13 : i32 | |
| %15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %16 = spirv.Load "PushConstant" %15 : i32 | |
| %__resource_var_0_0__1_addr = spirv.mlir.addressof @__resource_var_0_0__1 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %17 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %19 = spirv.Select %17, %18, %10 : i1, i32 | |
| %20 = spirv.SDiv %19, %cst16_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %20 : i32 | |
| %22 = spirv.Select %17, %21, %20 : i1, i32 | |
| %__resource_var_0_1__0_addr = spirv.mlir.addressof @__resource_var_0_1__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %23 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %25 = spirv.Select %23, %24, %8 : i1, i32 | |
| %26 = spirv.SDiv %25, %cst16_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %26 : i32 | |
| %28 = spirv.Select %23, %27, %26 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %29 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %31 = spirv.Select %29, %30, %12 : i1, i32 | |
| %32 = spirv.SDiv %31, %cst2_i32 : i32 | |
| %33 = spirv.ISub %cst-1_i32, %32 : i32 | |
| %34 = spirv.Select %29, %33, %32 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %35 = spirv.SLessThan %14, %cst0_i32 : i32 | |
| %36 = spirv.ISub %cst-1_i32, %14 : i32 | |
| %37 = spirv.Select %35, %36, %14 : i1, i32 | |
| %38 = spirv.SDiv %37, %cst2_i32 : i32 | |
| %39 = spirv.ISub %cst-1_i32, %38 : i32 | |
| %40 = spirv.Select %35, %39, %38 : i1, i32 | |
| %41 = spirv.SLessThan %16, %cst0_i32 : i32 | |
| %42 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %43 = spirv.Select %41, %42, %16 : i1, i32 | |
| %44 = spirv.SDiv %43, %cst16_i32 : i32 | |
| %45 = spirv.ISub %cst-1_i32, %44 : i32 | |
| %46 = spirv.Select %41, %45, %44 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %48 = spirv.CompositeExtract %47[2 : i32] : vector<3xi32> | |
| %49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %50 = spirv.CompositeExtract %49[1 : i32] : vector<3xi32> | |
| %51 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %52 = spirv.CompositeExtract %51[0 : i32] : vector<3xi32> | |
| %53 = spirv.IMul %50, %cst23040_i32 : i32 | |
| %54 = spirv.IAdd %53, %2 : i32 | |
| %55 = spirv.IMul %4, %cst5760_i32 : i32 | |
| %56 = spirv.IAdd %54, %55 : i32 | |
| %57 = spirv.IMul %6, %cst11520_i32 : i32 | |
| %58 = spirv.IAdd %56, %57 : i32 | |
| %59 = spirv.IAdd %58, %22 : i32 | |
| %60 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %61 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %62 = spirv.Select %60, %61, %2 : i1, i32 | |
| %63 = spirv.SDiv %62, %cst4_i32 : i32 | |
| %64 = spirv.ISub %cst-1_i32, %63 : i32 | |
| %65 = spirv.Select %60, %64, %63 : i1, i32 | |
| %66 = spirv.IMul %65, %cst356_i32 : i32 | |
| %67 = spirv.IAdd %59, %66 : i32 | |
| %68 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %69 = spirv.Load "StorageBuffer" %68 : vector<4xf32> | |
| %70 = spirv.IMul %4, %cst80_i32 : i32 | |
| %71 = spirv.IAdd %2, %70 : i32 | |
| %72 = spirv.IMul %6, %cst160_i32 : i32 | |
| %73 = spirv.IAdd %71, %72 : i32 | |
| %74 = spirv.IAdd %73, %65 : i32 | |
| %75 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %75, %69 : vector<4xf32> | |
| %76 = spirv.IAdd %67, %cst11520_i32 : i32 | |
| %77 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %78 = spirv.Load "StorageBuffer" %77 : vector<4xf32> | |
| %79 = spirv.IAdd %74, %cst160_i32 : i32 | |
| %80 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %80, %78 : vector<4xf32> | |
| %81 = spirv.IMul %4, %cst9216_i32 : i32 | |
| %82 = spirv.IAdd %2, %81 : i32 | |
| %83 = spirv.IMul %6, %cst18432_i32 : i32 | |
| %84 = spirv.IAdd %82, %83 : i32 | |
| %85 = spirv.IMul %52, %cst8_i32 : i32 | |
| %86 = spirv.IAdd %84, %85 : i32 | |
| %87 = spirv.IMul %48, %cst3317760_i32 : i32 | |
| %88 = spirv.IAdd %86, %87 : i32 | |
| %89 = spirv.IAdd %88, %28 : i32 | |
| %90 = spirv.SDiv %62, %cst8_i32 : i32 | |
| %91 = spirv.ISub %cst-1_i32, %90 : i32 | |
| %92 = spirv.Select %60, %91, %90 : i1, i32 | |
| %93 = spirv.IMul %92, %cst1144_i32 : i32 | |
| %94 = spirv.IAdd %89, %93 : i32 | |
| %95 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %96 = spirv.Load "StorageBuffer" %95 : vector<4xf32> | |
| %97 = spirv.IMul %4, %cst72_i32 : i32 | |
| %98 = spirv.IAdd %2, %97 : i32 | |
| %99 = spirv.IMul %6, %cst144_i32 : i32 | |
| %100 = spirv.IAdd %98, %99 : i32 | |
| %101 = spirv.IAdd %100, %92 : i32 | |
| %102 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %102, %96 : vector<4xf32> | |
| %103 = spirv.IAdd %94, %cst18432_i32 : i32 | |
| %104 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %105 = spirv.Load "StorageBuffer" %104 : vector<4xf32> | |
| %106 = spirv.IAdd %101, %cst144_i32 : i32 | |
| %107 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %107, %105 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %108 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %109 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %110 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %111 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %112 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%305: i32, %306: !spirv.coopmatrix<16x16xf16, Subgroup>, %307: !spirv.coopmatrix<16x16xf16, Subgroup>, %308: !spirv.coopmatrix<16x16xf16, Subgroup>, %309: !spirv.coopmatrix<16x16xf16, Subgroup>, %310: i32): // 2 preds: ^bb0, ^bb2 | |
| %311 = spirv.SLessThan %305, %cst2848_i32 : i32 | |
| spirv.BranchConditional %311, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %312 = spirv.IMul %310, %cst320_i32 : i32 | |
| %313 = spirv.IMul %4, %cst160_i32 : i32 | |
| %314 = spirv.IAdd %312, %313 : i32 | |
| %315 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %314] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %316 = spirv.NV.CooperativeMatrixLoad %315, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %317 = spirv.IAdd %314, %cst2_i32 : i32 | |
| %318 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %317] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %319 = spirv.NV.CooperativeMatrixLoad %318, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %320 = spirv.IAdd %314, %cst80_i32 : i32 | |
| %321 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %320] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %322 = spirv.NV.CooperativeMatrixLoad %321, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %323 = spirv.IAdd %314, %cst82_i32 : i32 | |
| %324 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %323] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %325 = spirv.NV.CooperativeMatrixLoad %324, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %326 = spirv.IMul %310, %cst288_i32 : i32 | |
| %327 = spirv.IMul %6, %cst288_i32 : i32 | |
| %328 = spirv.IAdd %326, %327 : i32 | |
| %329 = spirv.SDiv %62, %cst32_i32 : i32 | |
| %330 = spirv.ISub %cst-1_i32, %329 : i32 | |
| %331 = spirv.Select %60, %330, %329 : i1, i32 | |
| %332 = spirv.IMul %331, %cst4_i32 : i32 | |
| %333 = spirv.IAdd %328, %332 : i32 | |
| %334 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %333] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %335 = spirv.NV.CooperativeMatrixLoad %334, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %336 = spirv.IAdd %333, %cst2_i32 : i32 | |
| %337 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %336] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %338 = spirv.NV.CooperativeMatrixLoad %337, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %339 = spirv.IAdd %333, %cst144_i32 : i32 | |
| %340 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %339] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %341 = spirv.NV.CooperativeMatrixLoad %340, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %342 = spirv.IAdd %333, %cst146_i32 : i32 | |
| %343 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %342] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %344 = spirv.NV.CooperativeMatrixLoad %343, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %345 = spirv.NV.CooperativeMatrixMulAdd %316, %335, %306 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %346 = spirv.NV.CooperativeMatrixMulAdd %319, %341, %345 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %347 = spirv.NV.CooperativeMatrixMulAdd %316, %338, %307 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %348 = spirv.NV.CooperativeMatrixMulAdd %319, %344, %347 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %349 = spirv.NV.CooperativeMatrixMulAdd %322, %335, %308 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %350 = spirv.NV.CooperativeMatrixMulAdd %325, %341, %349 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %351 = spirv.NV.CooperativeMatrixMulAdd %322, %338, %309 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %352 = spirv.NV.CooperativeMatrixMulAdd %325, %344, %351 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %353 = spirv.IAdd %305, %cst32_i32 : i32 | |
| %354 = spirv.SLessThan %353, %cst0_i32 : i32 | |
| %355 = spirv.ISub %cst-33_i32, %305 : i32 | |
| %356 = spirv.Select %354, %355, %353 : i1, i32 | |
| %357 = spirv.SDiv %356, %cst8_i32 : i32 | |
| %358 = spirv.ISub %cst-1_i32, %357 : i32 | |
| %359 = spirv.Select %354, %358, %357 : i1, i32 | |
| %360 = spirv.IAdd %59, %359 : i32 | |
| %361 = spirv.IAdd %360, %66 : i32 | |
| %362 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %361] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %363 = spirv.Load "StorageBuffer" %362 : vector<4xf32> | |
| %364 = spirv.SDiv %356, %cst32_i32 : i32 | |
| %365 = spirv.ISub %cst-1_i32, %364 : i32 | |
| %366 = spirv.Select %354, %365, %364 : i1, i32 | |
| %367 = spirv.GL.SAbs %366 : i32 | |
| %368 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %369 = spirv.UMod %367, %368 : i32 | |
| %370 = spirv.IEqual %366, %367 : i32 | |
| %371 = spirv.SNegate %369 : i32 | |
| %372 = spirv.Select %370, %369, %371 : i1, i32 | |
| %373 = spirv.SLessThan %372, %cst0_i32 : i32 | |
| %374 = spirv.IAdd %372, %cst2_i32 : i32 | |
| %375 = spirv.Select %373, %374, %372 : i1, i32 | |
| %376 = spirv.IMul %366, %cst320_i32 : i32 | |
| %377 = spirv.IAdd %376, %73 : i32 | |
| %378 = spirv.SLessThan %366, %cst0_i32 : i32 | |
| %379 = spirv.ISub %cst-1_i32, %366 : i32 | |
| %380 = spirv.Select %378, %379, %366 : i1, i32 | |
| %381 = spirv.SDiv %380, %cst2_i32 : i32 | |
| %382 = spirv.ISub %cst-1_i32, %381 : i32 | |
| %383 = spirv.Select %378, %382, %381 : i1, i32 | |
| %384 = spirv.IMul %383, %cst-640_i32 : i32 | |
| %385 = spirv.IAdd %377, %384 : i32 | |
| %386 = spirv.IAdd %385, %65 : i32 | |
| %387 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %386] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %387, %363 : vector<4xf32> | |
| %388 = spirv.IAdd %361, %cst11520_i32 : i32 | |
| %389 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %388] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %390 = spirv.Load "StorageBuffer" %389 : vector<4xf32> | |
| %391 = spirv.IAdd %386, %cst160_i32 : i32 | |
| %392 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %391] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %392, %390 : vector<4xf32> | |
| %393 = spirv.IMul %353, %cst1152_i32 : i32 | |
| %394 = spirv.IAdd %393, %2 : i32 | |
| %395 = spirv.IAdd %394, %81 : i32 | |
| %396 = spirv.IAdd %395, %83 : i32 | |
| %397 = spirv.IAdd %396, %85 : i32 | |
| %398 = spirv.IAdd %397, %87 : i32 | |
| %399 = spirv.IAdd %398, %28 : i32 | |
| %400 = spirv.IAdd %399, %93 : i32 | |
| %401 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %400] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %402 = spirv.Load "StorageBuffer" %401 : vector<4xf32> | |
| %403 = spirv.IMul %366, %cst288_i32 : i32 | |
| %404 = spirv.IAdd %403, %100 : i32 | |
| %405 = spirv.IMul %383, %cst-576_i32 : i32 | |
| %406 = spirv.IAdd %404, %405 : i32 | |
| %407 = spirv.IAdd %406, %92 : i32 | |
| %408 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %407] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %408, %402 : vector<4xf32> | |
| %409 = spirv.IAdd %400, %cst18432_i32 : i32 | |
| %410 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %409] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %411 = spirv.Load "StorageBuffer" %410 : vector<4xf32> | |
| %412 = spirv.IAdd %407, %cst144_i32 : i32 | |
| %413 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %412] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %413, %411 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %108, %346 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %109, %348 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %110, %350 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %111, %352 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %112, %375 : i32 | |
| spirv.Branch ^bb1(%353, %346, %348, %350, %352, %375 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %113 = spirv.Load "Function" %112 : i32 | |
| %114 = spirv.Load "Function" %111 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %115 = spirv.Load "Function" %110 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %116 = spirv.Load "Function" %109 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %117 = spirv.Load "Function" %108 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %118 = spirv.IMul %4, %cst160_i32 : i32 | |
| %119 = spirv.IMul %113, %cst320_i32 : i32 | |
| %120 = spirv.IAdd %118, %119 : i32 | |
| %121 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %123 = spirv.IAdd %120, %cst2_i32 : i32 | |
| %124 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %126 = spirv.IAdd %120, %cst80_i32 : i32 | |
| %127 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %129 = spirv.IAdd %120, %cst82_i32 : i32 | |
| %130 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %132 = spirv.IMul %113, %cst288_i32 : i32 | |
| %133 = spirv.IMul %6, %cst288_i32 : i32 | |
| %134 = spirv.IAdd %132, %133 : i32 | |
| %135 = spirv.SDiv %62, %cst32_i32 : i32 | |
| %136 = spirv.ISub %cst-1_i32, %135 : i32 | |
| %137 = spirv.Select %60, %136, %135 : i1, i32 | |
| %138 = spirv.IMul %137, %cst4_i32 : i32 | |
| %139 = spirv.IAdd %134, %138 : i32 | |
| %140 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.IAdd %139, %cst2_i32 : i32 | |
| %143 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.IAdd %139, %cst144_i32 : i32 | |
| %146 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %147 = spirv.NV.CooperativeMatrixLoad %146, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %148 = spirv.IAdd %139, %cst146_i32 : i32 | |
| %149 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %150 = spirv.NV.CooperativeMatrixLoad %149, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %151 = spirv.NV.CooperativeMatrixMulAdd %122, %141, %117 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %152 = spirv.NV.CooperativeMatrixMulAdd %125, %147, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %153 = spirv.NV.CooperativeMatrixMulAdd %122, %144, %116 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %154 = spirv.NV.CooperativeMatrixMulAdd %125, %150, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %155 = spirv.NV.CooperativeMatrixMulAdd %128, %141, %115 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %156 = spirv.NV.CooperativeMatrixMulAdd %131, %147, %155 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %157 = spirv.NV.CooperativeMatrixMulAdd %128, %144, %114 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %158 = spirv.NV.CooperativeMatrixMulAdd %131, %150, %157 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %159 = spirv.IMul %4, %cst288_i32 : i32 | |
| %160 = spirv.IMul %6, %cst576_i32 : i32 | |
| %161 = spirv.IAdd %159, %160 : i32 | |
| %162 = spirv.IAdd %161, %138 : i32 | |
| %163 = spirv.IAdd %162, %cst146_i32 : i32 | |
| %164 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %164, %158, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %165 = spirv.IAdd %162, %cst144_i32 : i32 | |
| %166 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %165] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %166, %156, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %167 = spirv.IAdd %162, %cst2_i32 : i32 | |
| %168 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %168, %154, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %169 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %169, %152, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %170 = spirv.IMul %50, %cst64_i32 : i32 | |
| %171 = spirv.IMul %4, %cst8_i32 : i32 | |
| %172 = spirv.IAdd %170, %171 : i32 | |
| %173 = spirv.IMul %6, %cst16_i32 : i32 | |
| %174 = spirv.IAdd %172, %173 : i32 | |
| %175 = spirv.IAdd %174, %34 : i32 | |
| %176 = spirv.IAdd %175, %92 : i32 | |
| %177 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %176] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %178 = spirv.Load "StorageBuffer" %177 : f16 | |
| %179 = spirv.IAdd %174, %40 : i32 | |
| %180 = spirv.IAdd %179, %92 : i32 | |
| %181 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %182 = spirv.Load "StorageBuffer" %181 : f16 | |
| %183 = spirv.IMul %48, %cst320_i32 : i32 | |
| %184 = spirv.IAdd %174, %183 : i32 | |
| %185 = spirv.IAdd %184, %92 : i32 | |
| %186 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %187 = spirv.Load "StorageBuffer" %186 : f16 | |
| %188 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %189 = spirv.Load "Workgroup" %188 : vector<4xf32> | |
| %190 = spirv.CompositeConstruct %182, %182, %182, %182 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %191 = spirv.CompositeConstruct %187, %187, %187, %187 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %192 = spirv.FAdd %190, %191 : vector<4xf16> | |
| %193 = spirv.VectorShuffle [0 : i32, 1 : i32] %189 : vector<4xf32>, %189 : vector<4xf32> -> vector<2xf32> | |
| %194 = spirv.Bitcast %193 : vector<2xf32> to vector<4xf16> | |
| %195 = spirv.CompositeConstruct %178, %178, %178, %178 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %196 = spirv.FAdd %194, %195 : vector<4xf16> | |
| %197 = spirv.VectorShuffle [2 : i32, 3 : i32] %189 : vector<4xf32>, %189 : vector<4xf32> -> vector<2xf32> | |
| %198 = spirv.Bitcast %197 : vector<2xf32> to vector<4xf16> | |
| %199 = spirv.FAdd %198, %195 : vector<4xf16> | |
| %200 = spirv.FAdd %196, %192 : vector<4xf16> | |
| %201 = spirv.FAdd %199, %192 : vector<4xf16> | |
| %202 = spirv.Bitcast %201 : vector<4xf16> to vector<2xf32> | |
| %203 = spirv.Bitcast %200 : vector<4xf16> to vector<2xf32> | |
| %204 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %203 : vector<2xf32> -> vector<4xf32> | |
| %205 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %204 : vector<4xf32>, %202 : vector<2xf32> -> vector<4xf32> | |
| %206 = spirv.IMul %50, %cst73728_i32 : i32 | |
| %207 = spirv.IAdd %206, %2 : i32 | |
| %208 = spirv.IAdd %207, %81 : i32 | |
| %209 = spirv.IAdd %208, %83 : i32 | |
| %210 = spirv.IAdd %209, %85 : i32 | |
| %211 = spirv.IMul %48, %cst368640_i32 : i32 | |
| %212 = spirv.IAdd %210, %211 : i32 | |
| %213 = spirv.IAdd %212, %46 : i32 | |
| %214 = spirv.IAdd %213, %93 : i32 | |
| %215 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %214] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %215, %205 : vector<4xf32> | |
| %216 = spirv.IAdd %176, %cst16_i32 : i32 | |
| %217 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %216] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %218 = spirv.Load "StorageBuffer" %217 : f16 | |
| %219 = spirv.IAdd %180, %cst16_i32 : i32 | |
| %220 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %219] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %221 = spirv.Load "StorageBuffer" %220 : f16 | |
| %222 = spirv.IAdd %185, %cst16_i32 : i32 | |
| %223 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %224 = spirv.Load "StorageBuffer" %223 : f16 | |
| %225 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %226 = spirv.Load "Workgroup" %225 : vector<4xf32> | |
| %227 = spirv.CompositeConstruct %221, %221, %221, %221 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %228 = spirv.CompositeConstruct %224, %224, %224, %224 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %229 = spirv.FAdd %227, %228 : vector<4xf16> | |
| %230 = spirv.VectorShuffle [0 : i32, 1 : i32] %226 : vector<4xf32>, %226 : vector<4xf32> -> vector<2xf32> | |
| %231 = spirv.Bitcast %230 : vector<2xf32> to vector<4xf16> | |
| %232 = spirv.CompositeConstruct %218, %218, %218, %218 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %233 = spirv.FAdd %231, %232 : vector<4xf16> | |
| %234 = spirv.VectorShuffle [2 : i32, 3 : i32] %226 : vector<4xf32>, %226 : vector<4xf32> -> vector<2xf32> | |
| %235 = spirv.Bitcast %234 : vector<2xf32> to vector<4xf16> | |
| %236 = spirv.FAdd %235, %232 : vector<4xf16> | |
| %237 = spirv.FAdd %233, %229 : vector<4xf16> | |
| %238 = spirv.FAdd %236, %229 : vector<4xf16> | |
| %239 = spirv.Bitcast %238 : vector<4xf16> to vector<2xf32> | |
| %240 = spirv.Bitcast %237 : vector<4xf16> to vector<2xf32> | |
| %241 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %240 : vector<2xf32> -> vector<4xf32> | |
| %242 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %241 : vector<4xf32>, %239 : vector<2xf32> -> vector<4xf32> | |
| %243 = spirv.IAdd %214, %cst18432_i32 : i32 | |
| %244 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %243] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %244, %242 : vector<4xf32> | |
| %245 = spirv.IAdd %176, %cst32_i32 : i32 | |
| %246 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %247 = spirv.Load "StorageBuffer" %246 : f16 | |
| %248 = spirv.IAdd %180, %cst32_i32 : i32 | |
| %249 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %250 = spirv.Load "StorageBuffer" %249 : f16 | |
| %251 = spirv.IAdd %185, %cst32_i32 : i32 | |
| %252 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %251] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %253 = spirv.Load "StorageBuffer" %252 : f16 | |
| %254 = spirv.IAdd %101, %cst288_i32 : i32 | |
| %255 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %254] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %256 = spirv.Load "Workgroup" %255 : vector<4xf32> | |
| %257 = spirv.CompositeConstruct %250, %250, %250, %250 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %258 = spirv.CompositeConstruct %253, %253, %253, %253 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %259 = spirv.FAdd %257, %258 : vector<4xf16> | |
| %260 = spirv.VectorShuffle [0 : i32, 1 : i32] %256 : vector<4xf32>, %256 : vector<4xf32> -> vector<2xf32> | |
| %261 = spirv.Bitcast %260 : vector<2xf32> to vector<4xf16> | |
| %262 = spirv.CompositeConstruct %247, %247, %247, %247 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %263 = spirv.FAdd %261, %262 : vector<4xf16> | |
| %264 = spirv.VectorShuffle [2 : i32, 3 : i32] %256 : vector<4xf32>, %256 : vector<4xf32> -> vector<2xf32> | |
| %265 = spirv.Bitcast %264 : vector<2xf32> to vector<4xf16> | |
| %266 = spirv.FAdd %265, %262 : vector<4xf16> | |
| %267 = spirv.FAdd %263, %259 : vector<4xf16> | |
| %268 = spirv.FAdd %266, %259 : vector<4xf16> | |
| %269 = spirv.Bitcast %268 : vector<4xf16> to vector<2xf32> | |
| %270 = spirv.Bitcast %267 : vector<4xf16> to vector<2xf32> | |
| %271 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %270 : vector<2xf32> -> vector<4xf32> | |
| %272 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %271 : vector<4xf32>, %269 : vector<2xf32> -> vector<4xf32> | |
| %273 = spirv.IAdd %214, %cst36864_i32 : i32 | |
| %274 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %273] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %274, %272 : vector<4xf32> | |
| %275 = spirv.IAdd %176, %cst48_i32 : i32 | |
| %276 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %275] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %277 = spirv.Load "StorageBuffer" %276 : f16 | |
| %278 = spirv.IAdd %180, %cst48_i32 : i32 | |
| %279 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %278] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %280 = spirv.Load "StorageBuffer" %279 : f16 | |
| %281 = spirv.IAdd %185, %cst48_i32 : i32 | |
| %282 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %281] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %283 = spirv.Load "StorageBuffer" %282 : f16 | |
| %284 = spirv.IAdd %101, %cst432_i32 : i32 | |
| %285 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %284] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %286 = spirv.Load "Workgroup" %285 : vector<4xf32> | |
| %287 = spirv.CompositeConstruct %280, %280, %280, %280 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %288 = spirv.CompositeConstruct %283, %283, %283, %283 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %289 = spirv.FAdd %287, %288 : vector<4xf16> | |
| %290 = spirv.VectorShuffle [0 : i32, 1 : i32] %286 : vector<4xf32>, %286 : vector<4xf32> -> vector<2xf32> | |
| %291 = spirv.Bitcast %290 : vector<2xf32> to vector<4xf16> | |
| %292 = spirv.CompositeConstruct %277, %277, %277, %277 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %293 = spirv.FAdd %291, %292 : vector<4xf16> | |
| %294 = spirv.VectorShuffle [2 : i32, 3 : i32] %286 : vector<4xf32>, %286 : vector<4xf32> -> vector<2xf32> | |
| %295 = spirv.Bitcast %294 : vector<2xf32> to vector<4xf16> | |
| %296 = spirv.FAdd %295, %292 : vector<4xf16> | |
| %297 = spirv.FAdd %293, %289 : vector<4xf16> | |
| %298 = spirv.FAdd %296, %289 : vector<4xf16> | |
| %299 = spirv.Bitcast %298 : vector<4xf16> to vector<2xf32> | |
| %300 = spirv.Bitcast %297 : vector<4xf16> to vector<2xf32> | |
| %301 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %300 : vector<2xf32> -> vector<4xf32> | |
| %302 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %301 : vector<4xf32>, %299 : vector<2xf32> -> vector<4xf32> | |
| %303 = spirv.IAdd %214, %cst55296_i32 : i32 | |
| %304 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %303] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %304, %302 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_22_generic_2x320x9216x2880, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_22_generic_2x320x9216x2880 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_30 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_30_generic_2x320x9216x2880 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c144 = arith.constant 144 : index | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c144, %c5, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_30_generic_2x320x9216x2880() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst432_i32 = spirv.Constant 432 : i32 | |
| %cst48_i32 = spirv.Constant 48 : i32 | |
| %cst55296_i32 = spirv.Constant 55296 : i32 | |
| %cst36864_i32 = spirv.Constant 36864 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst368640_i32 = spirv.Constant 368640 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst576_i32 = spirv.Constant 576 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst1152_i32 = spirv.Constant 1152 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst1144_i32 = spirv.Constant 1144 : i32 | |
| %cst3317760_i32 = spirv.Constant 3317760 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst18432_i32 = spirv.Constant 18432 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst356_i32 = spirv.Constant 356 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst11520_i32 = spirv.Constant 11520 : i32 | |
| %cst5760_i32 = spirv.Constant 5760 : i32 | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst2848_i32 = spirv.Constant 2848 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %14 = spirv.Load "PushConstant" %13 : i32 | |
| %15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %16 = spirv.Load "PushConstant" %15 : i32 | |
| %17 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %19 = spirv.Select %17, %18, %8 : i1, i32 | |
| %20 = spirv.SDiv %19, %cst16_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %20 : i32 | |
| %22 = spirv.Select %17, %21, %20 : i1, i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %23 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %25 = spirv.Select %23, %24, %12 : i1, i32 | |
| %26 = spirv.SDiv %25, %cst16_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %26 : i32 | |
| %28 = spirv.Select %23, %27, %26 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %29 = spirv.SLessThan %14, %cst0_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %14 : i32 | |
| %31 = spirv.Select %29, %30, %14 : i1, i32 | |
| %32 = spirv.SDiv %31, %cst16_i32 : i32 | |
| %33 = spirv.ISub %cst-1_i32, %32 : i32 | |
| %34 = spirv.Select %29, %33, %32 : i1, i32 | |
| %35 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %36 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %37 = spirv.Select %35, %36, %10 : i1, i32 | |
| %38 = spirv.SDiv %37, %cst2_i32 : i32 | |
| %39 = spirv.ISub %cst-1_i32, %38 : i32 | |
| %40 = spirv.Select %35, %39, %38 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %41 = spirv.SLessThan %16, %cst0_i32 : i32 | |
| %42 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %43 = spirv.Select %41, %42, %16 : i1, i32 | |
| %44 = spirv.SDiv %43, %cst16_i32 : i32 | |
| %45 = spirv.ISub %cst-1_i32, %44 : i32 | |
| %46 = spirv.Select %41, %45, %44 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %48 = spirv.CompositeExtract %47[2 : i32] : vector<3xi32> | |
| %49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %50 = spirv.CompositeExtract %49[1 : i32] : vector<3xi32> | |
| %51 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %52 = spirv.CompositeExtract %51[0 : i32] : vector<3xi32> | |
| %53 = spirv.IMul %50, %cst23040_i32 : i32 | |
| %54 = spirv.IAdd %53, %2 : i32 | |
| %55 = spirv.IMul %4, %cst5760_i32 : i32 | |
| %56 = spirv.IAdd %54, %55 : i32 | |
| %57 = spirv.IMul %6, %cst11520_i32 : i32 | |
| %58 = spirv.IAdd %56, %57 : i32 | |
| %59 = spirv.IAdd %58, %22 : i32 | |
| %60 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %61 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %62 = spirv.Select %60, %61, %2 : i1, i32 | |
| %63 = spirv.SDiv %62, %cst4_i32 : i32 | |
| %64 = spirv.ISub %cst-1_i32, %63 : i32 | |
| %65 = spirv.Select %60, %64, %63 : i1, i32 | |
| %66 = spirv.IMul %65, %cst356_i32 : i32 | |
| %67 = spirv.IAdd %59, %66 : i32 | |
| %68 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %69 = spirv.Load "StorageBuffer" %68 : vector<4xf32> | |
| %70 = spirv.IMul %4, %cst80_i32 : i32 | |
| %71 = spirv.IAdd %2, %70 : i32 | |
| %72 = spirv.IMul %6, %cst160_i32 : i32 | |
| %73 = spirv.IAdd %71, %72 : i32 | |
| %74 = spirv.IAdd %73, %65 : i32 | |
| %75 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %75, %69 : vector<4xf32> | |
| %76 = spirv.IAdd %67, %cst11520_i32 : i32 | |
| %77 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %78 = spirv.Load "StorageBuffer" %77 : vector<4xf32> | |
| %79 = spirv.IAdd %74, %cst160_i32 : i32 | |
| %80 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %80, %78 : vector<4xf32> | |
| %81 = spirv.IMul %4, %cst9216_i32 : i32 | |
| %82 = spirv.IAdd %2, %81 : i32 | |
| %83 = spirv.IMul %6, %cst18432_i32 : i32 | |
| %84 = spirv.IAdd %82, %83 : i32 | |
| %85 = spirv.IMul %52, %cst8_i32 : i32 | |
| %86 = spirv.IAdd %84, %85 : i32 | |
| %87 = spirv.IMul %48, %cst3317760_i32 : i32 | |
| %88 = spirv.IAdd %86, %87 : i32 | |
| %89 = spirv.IAdd %88, %28 : i32 | |
| %90 = spirv.SDiv %62, %cst8_i32 : i32 | |
| %91 = spirv.ISub %cst-1_i32, %90 : i32 | |
| %92 = spirv.Select %60, %91, %90 : i1, i32 | |
| %93 = spirv.IMul %92, %cst1144_i32 : i32 | |
| %94 = spirv.IAdd %89, %93 : i32 | |
| %95 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %96 = spirv.Load "StorageBuffer" %95 : vector<4xf32> | |
| %97 = spirv.IMul %4, %cst72_i32 : i32 | |
| %98 = spirv.IAdd %2, %97 : i32 | |
| %99 = spirv.IMul %6, %cst144_i32 : i32 | |
| %100 = spirv.IAdd %98, %99 : i32 | |
| %101 = spirv.IAdd %100, %92 : i32 | |
| %102 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %102, %96 : vector<4xf32> | |
| %103 = spirv.IAdd %94, %cst18432_i32 : i32 | |
| %104 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %105 = spirv.Load "StorageBuffer" %104 : vector<4xf32> | |
| %106 = spirv.IAdd %101, %cst144_i32 : i32 | |
| %107 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %107, %105 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %108 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %109 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %110 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %111 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %112 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%295: i32, %296: !spirv.coopmatrix<16x16xf16, Subgroup>, %297: !spirv.coopmatrix<16x16xf16, Subgroup>, %298: !spirv.coopmatrix<16x16xf16, Subgroup>, %299: !spirv.coopmatrix<16x16xf16, Subgroup>, %300: i32): // 2 preds: ^bb0, ^bb2 | |
| %301 = spirv.SLessThan %295, %cst2848_i32 : i32 | |
| spirv.BranchConditional %301, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %302 = spirv.IMul %300, %cst320_i32 : i32 | |
| %303 = spirv.IMul %4, %cst160_i32 : i32 | |
| %304 = spirv.IAdd %302, %303 : i32 | |
| %305 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %304] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %306 = spirv.NV.CooperativeMatrixLoad %305, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %307 = spirv.IAdd %304, %cst2_i32 : i32 | |
| %308 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %307] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %309 = spirv.NV.CooperativeMatrixLoad %308, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %310 = spirv.IAdd %304, %cst80_i32 : i32 | |
| %311 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %310] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %312 = spirv.NV.CooperativeMatrixLoad %311, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %313 = spirv.IAdd %304, %cst82_i32 : i32 | |
| %314 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %313] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %315 = spirv.NV.CooperativeMatrixLoad %314, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %316 = spirv.IMul %300, %cst288_i32 : i32 | |
| %317 = spirv.IMul %6, %cst288_i32 : i32 | |
| %318 = spirv.IAdd %316, %317 : i32 | |
| %319 = spirv.SDiv %62, %cst32_i32 : i32 | |
| %320 = spirv.ISub %cst-1_i32, %319 : i32 | |
| %321 = spirv.Select %60, %320, %319 : i1, i32 | |
| %322 = spirv.IMul %321, %cst4_i32 : i32 | |
| %323 = spirv.IAdd %318, %322 : i32 | |
| %324 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %323] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %325 = spirv.NV.CooperativeMatrixLoad %324, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %326 = spirv.IAdd %323, %cst2_i32 : i32 | |
| %327 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %326] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %328 = spirv.NV.CooperativeMatrixLoad %327, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %329 = spirv.IAdd %323, %cst144_i32 : i32 | |
| %330 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %329] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %331 = spirv.NV.CooperativeMatrixLoad %330, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %332 = spirv.IAdd %323, %cst146_i32 : i32 | |
| %333 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %332] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %334 = spirv.NV.CooperativeMatrixLoad %333, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %335 = spirv.NV.CooperativeMatrixMulAdd %306, %325, %296 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %336 = spirv.NV.CooperativeMatrixMulAdd %309, %331, %335 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %337 = spirv.NV.CooperativeMatrixMulAdd %306, %328, %297 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %338 = spirv.NV.CooperativeMatrixMulAdd %309, %334, %337 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %339 = spirv.NV.CooperativeMatrixMulAdd %312, %325, %298 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %340 = spirv.NV.CooperativeMatrixMulAdd %315, %331, %339 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %341 = spirv.NV.CooperativeMatrixMulAdd %312, %328, %299 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %342 = spirv.NV.CooperativeMatrixMulAdd %315, %334, %341 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %343 = spirv.IAdd %295, %cst32_i32 : i32 | |
| %344 = spirv.SLessThan %343, %cst0_i32 : i32 | |
| %345 = spirv.ISub %cst-33_i32, %295 : i32 | |
| %346 = spirv.Select %344, %345, %343 : i1, i32 | |
| %347 = spirv.SDiv %346, %cst8_i32 : i32 | |
| %348 = spirv.ISub %cst-1_i32, %347 : i32 | |
| %349 = spirv.Select %344, %348, %347 : i1, i32 | |
| %350 = spirv.IAdd %59, %349 : i32 | |
| %351 = spirv.IAdd %350, %66 : i32 | |
| %352 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %351] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %353 = spirv.Load "StorageBuffer" %352 : vector<4xf32> | |
| %354 = spirv.SDiv %346, %cst32_i32 : i32 | |
| %355 = spirv.ISub %cst-1_i32, %354 : i32 | |
| %356 = spirv.Select %344, %355, %354 : i1, i32 | |
| %357 = spirv.GL.SAbs %356 : i32 | |
| %358 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %359 = spirv.UMod %357, %358 : i32 | |
| %360 = spirv.IEqual %356, %357 : i32 | |
| %361 = spirv.SNegate %359 : i32 | |
| %362 = spirv.Select %360, %359, %361 : i1, i32 | |
| %363 = spirv.SLessThan %362, %cst0_i32 : i32 | |
| %364 = spirv.IAdd %362, %cst2_i32 : i32 | |
| %365 = spirv.Select %363, %364, %362 : i1, i32 | |
| %366 = spirv.IMul %356, %cst320_i32 : i32 | |
| %367 = spirv.IAdd %366, %73 : i32 | |
| %368 = spirv.SLessThan %356, %cst0_i32 : i32 | |
| %369 = spirv.ISub %cst-1_i32, %356 : i32 | |
| %370 = spirv.Select %368, %369, %356 : i1, i32 | |
| %371 = spirv.SDiv %370, %cst2_i32 : i32 | |
| %372 = spirv.ISub %cst-1_i32, %371 : i32 | |
| %373 = spirv.Select %368, %372, %371 : i1, i32 | |
| %374 = spirv.IMul %373, %cst-640_i32 : i32 | |
| %375 = spirv.IAdd %367, %374 : i32 | |
| %376 = spirv.IAdd %375, %65 : i32 | |
| %377 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %376] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %377, %353 : vector<4xf32> | |
| %378 = spirv.IAdd %351, %cst11520_i32 : i32 | |
| %379 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %378] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %380 = spirv.Load "StorageBuffer" %379 : vector<4xf32> | |
| %381 = spirv.IAdd %376, %cst160_i32 : i32 | |
| %382 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %381] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %382, %380 : vector<4xf32> | |
| %383 = spirv.IMul %343, %cst1152_i32 : i32 | |
| %384 = spirv.IAdd %383, %2 : i32 | |
| %385 = spirv.IAdd %384, %81 : i32 | |
| %386 = spirv.IAdd %385, %83 : i32 | |
| %387 = spirv.IAdd %386, %85 : i32 | |
| %388 = spirv.IAdd %387, %87 : i32 | |
| %389 = spirv.IAdd %388, %28 : i32 | |
| %390 = spirv.IAdd %389, %93 : i32 | |
| %391 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %390] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %392 = spirv.Load "StorageBuffer" %391 : vector<4xf32> | |
| %393 = spirv.IMul %356, %cst288_i32 : i32 | |
| %394 = spirv.IAdd %393, %100 : i32 | |
| %395 = spirv.IMul %373, %cst-576_i32 : i32 | |
| %396 = spirv.IAdd %394, %395 : i32 | |
| %397 = spirv.IAdd %396, %92 : i32 | |
| %398 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %397] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %398, %392 : vector<4xf32> | |
| %399 = spirv.IAdd %390, %cst18432_i32 : i32 | |
| %400 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %399] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %401 = spirv.Load "StorageBuffer" %400 : vector<4xf32> | |
| %402 = spirv.IAdd %397, %cst144_i32 : i32 | |
| %403 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %402] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %403, %401 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %108, %336 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %109, %338 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %110, %340 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %111, %342 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %112, %365 : i32 | |
| spirv.Branch ^bb1(%343, %336, %338, %340, %342, %365 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %113 = spirv.Load "Function" %112 : i32 | |
| %114 = spirv.Load "Function" %111 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %115 = spirv.Load "Function" %110 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %116 = spirv.Load "Function" %109 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %117 = spirv.Load "Function" %108 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %118 = spirv.IMul %4, %cst160_i32 : i32 | |
| %119 = spirv.IMul %113, %cst320_i32 : i32 | |
| %120 = spirv.IAdd %118, %119 : i32 | |
| %121 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %123 = spirv.IAdd %120, %cst2_i32 : i32 | |
| %124 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %126 = spirv.IAdd %120, %cst80_i32 : i32 | |
| %127 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %129 = spirv.IAdd %120, %cst82_i32 : i32 | |
| %130 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %132 = spirv.IMul %113, %cst288_i32 : i32 | |
| %133 = spirv.IMul %6, %cst288_i32 : i32 | |
| %134 = spirv.IAdd %132, %133 : i32 | |
| %135 = spirv.SDiv %62, %cst32_i32 : i32 | |
| %136 = spirv.ISub %cst-1_i32, %135 : i32 | |
| %137 = spirv.Select %60, %136, %135 : i1, i32 | |
| %138 = spirv.IMul %137, %cst4_i32 : i32 | |
| %139 = spirv.IAdd %134, %138 : i32 | |
| %140 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.IAdd %139, %cst2_i32 : i32 | |
| %143 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.IAdd %139, %cst144_i32 : i32 | |
| %146 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %147 = spirv.NV.CooperativeMatrixLoad %146, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %148 = spirv.IAdd %139, %cst146_i32 : i32 | |
| %149 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %150 = spirv.NV.CooperativeMatrixLoad %149, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %151 = spirv.NV.CooperativeMatrixMulAdd %122, %141, %117 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %152 = spirv.NV.CooperativeMatrixMulAdd %125, %147, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %153 = spirv.NV.CooperativeMatrixMulAdd %122, %144, %116 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %154 = spirv.NV.CooperativeMatrixMulAdd %125, %150, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %155 = spirv.NV.CooperativeMatrixMulAdd %128, %141, %115 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %156 = spirv.NV.CooperativeMatrixMulAdd %131, %147, %155 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %157 = spirv.NV.CooperativeMatrixMulAdd %128, %144, %114 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %158 = spirv.NV.CooperativeMatrixMulAdd %131, %150, %157 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %159 = spirv.IMul %4, %cst288_i32 : i32 | |
| %160 = spirv.IMul %6, %cst576_i32 : i32 | |
| %161 = spirv.IAdd %159, %160 : i32 | |
| %162 = spirv.IAdd %161, %138 : i32 | |
| %163 = spirv.IAdd %162, %cst146_i32 : i32 | |
| %164 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %164, %158, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %165 = spirv.IAdd %162, %cst144_i32 : i32 | |
| %166 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %165] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %166, %156, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %167 = spirv.IAdd %162, %cst2_i32 : i32 | |
| %168 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %168, %154, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %169 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %169, %152, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %170 = spirv.IMul %50, %cst73728_i32 : i32 | |
| %171 = spirv.IAdd %170, %2 : i32 | |
| %172 = spirv.IAdd %171, %81 : i32 | |
| %173 = spirv.IAdd %172, %83 : i32 | |
| %174 = spirv.IAdd %173, %85 : i32 | |
| %175 = spirv.IMul %48, %cst368640_i32 : i32 | |
| %176 = spirv.IAdd %174, %175 : i32 | |
| %177 = spirv.IAdd %176, %34 : i32 | |
| %178 = spirv.IAdd %177, %93 : i32 | |
| %179 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %180 = spirv.Load "StorageBuffer" %179 : vector<4xf32> | |
| %181 = spirv.IMul %50, %cst64_i32 : i32 | |
| %182 = spirv.IMul %4, %cst8_i32 : i32 | |
| %183 = spirv.IAdd %181, %182 : i32 | |
| %184 = spirv.IMul %6, %cst16_i32 : i32 | |
| %185 = spirv.IAdd %183, %184 : i32 | |
| %186 = spirv.IAdd %185, %40 : i32 | |
| %187 = spirv.IAdd %186, %92 : i32 | |
| %188 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %189 = spirv.Load "StorageBuffer" %188 : f16 | |
| %190 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %191 = spirv.Load "Workgroup" %190 : vector<4xf32> | |
| %192 = spirv.VectorShuffle [0 : i32, 1 : i32] %191 : vector<4xf32>, %191 : vector<4xf32> -> vector<2xf32> | |
| %193 = spirv.Bitcast %192 : vector<2xf32> to vector<4xf16> | |
| %194 = spirv.CompositeConstruct %189, %189, %189, %189 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %195 = spirv.FAdd %193, %194 : vector<4xf16> | |
| %196 = spirv.VectorShuffle [2 : i32, 3 : i32] %191 : vector<4xf32>, %191 : vector<4xf32> -> vector<2xf32> | |
| %197 = spirv.Bitcast %196 : vector<2xf32> to vector<4xf16> | |
| %198 = spirv.FAdd %197, %194 : vector<4xf16> | |
| %199 = spirv.VectorShuffle [0 : i32, 1 : i32] %180 : vector<4xf32>, %180 : vector<4xf32> -> vector<2xf32> | |
| %200 = spirv.Bitcast %199 : vector<2xf32> to vector<4xf16> | |
| %201 = spirv.FAdd %200, %195 : vector<4xf16> | |
| %202 = spirv.VectorShuffle [2 : i32, 3 : i32] %180 : vector<4xf32>, %180 : vector<4xf32> -> vector<2xf32> | |
| %203 = spirv.Bitcast %202 : vector<2xf32> to vector<4xf16> | |
| %204 = spirv.FAdd %203, %198 : vector<4xf16> | |
| %205 = spirv.Bitcast %204 : vector<4xf16> to vector<2xf32> | |
| %206 = spirv.Bitcast %201 : vector<4xf16> to vector<2xf32> | |
| %207 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %206 : vector<2xf32> -> vector<4xf32> | |
| %208 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %207 : vector<4xf32>, %205 : vector<2xf32> -> vector<4xf32> | |
| %209 = spirv.IAdd %176, %46 : i32 | |
| %210 = spirv.IAdd %209, %93 : i32 | |
| %211 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %210] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %211, %208 : vector<4xf32> | |
| %212 = spirv.IAdd %178, %cst18432_i32 : i32 | |
| %213 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %212] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %214 = spirv.Load "StorageBuffer" %213 : vector<4xf32> | |
| %215 = spirv.IAdd %187, %cst16_i32 : i32 | |
| %216 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %215] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %217 = spirv.Load "StorageBuffer" %216 : f16 | |
| %218 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %219 = spirv.Load "Workgroup" %218 : vector<4xf32> | |
| %220 = spirv.VectorShuffle [0 : i32, 1 : i32] %219 : vector<4xf32>, %219 : vector<4xf32> -> vector<2xf32> | |
| %221 = spirv.Bitcast %220 : vector<2xf32> to vector<4xf16> | |
| %222 = spirv.CompositeConstruct %217, %217, %217, %217 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %223 = spirv.FAdd %221, %222 : vector<4xf16> | |
| %224 = spirv.VectorShuffle [2 : i32, 3 : i32] %219 : vector<4xf32>, %219 : vector<4xf32> -> vector<2xf32> | |
| %225 = spirv.Bitcast %224 : vector<2xf32> to vector<4xf16> | |
| %226 = spirv.FAdd %225, %222 : vector<4xf16> | |
| %227 = spirv.VectorShuffle [0 : i32, 1 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32> | |
| %228 = spirv.Bitcast %227 : vector<2xf32> to vector<4xf16> | |
| %229 = spirv.FAdd %228, %223 : vector<4xf16> | |
| %230 = spirv.VectorShuffle [2 : i32, 3 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32> | |
| %231 = spirv.Bitcast %230 : vector<2xf32> to vector<4xf16> | |
| %232 = spirv.FAdd %231, %226 : vector<4xf16> | |
| %233 = spirv.Bitcast %232 : vector<4xf16> to vector<2xf32> | |
| %234 = spirv.Bitcast %229 : vector<4xf16> to vector<2xf32> | |
| %235 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %234 : vector<2xf32> -> vector<4xf32> | |
| %236 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %235 : vector<4xf32>, %233 : vector<2xf32> -> vector<4xf32> | |
| %237 = spirv.IAdd %210, %cst18432_i32 : i32 | |
| %238 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %237] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %238, %236 : vector<4xf32> | |
| %239 = spirv.IAdd %178, %cst36864_i32 : i32 | |
| %240 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %239] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %241 = spirv.Load "StorageBuffer" %240 : vector<4xf32> | |
| %242 = spirv.IAdd %187, %cst32_i32 : i32 | |
| %243 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %242] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %244 = spirv.Load "StorageBuffer" %243 : f16 | |
| %245 = spirv.IAdd %101, %cst288_i32 : i32 | |
| %246 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %247 = spirv.Load "Workgroup" %246 : vector<4xf32> | |
| %248 = spirv.VectorShuffle [0 : i32, 1 : i32] %247 : vector<4xf32>, %247 : vector<4xf32> -> vector<2xf32> | |
| %249 = spirv.Bitcast %248 : vector<2xf32> to vector<4xf16> | |
| %250 = spirv.CompositeConstruct %244, %244, %244, %244 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %251 = spirv.FAdd %249, %250 : vector<4xf16> | |
| %252 = spirv.VectorShuffle [2 : i32, 3 : i32] %247 : vector<4xf32>, %247 : vector<4xf32> -> vector<2xf32> | |
| %253 = spirv.Bitcast %252 : vector<2xf32> to vector<4xf16> | |
| %254 = spirv.FAdd %253, %250 : vector<4xf16> | |
| %255 = spirv.VectorShuffle [0 : i32, 1 : i32] %241 : vector<4xf32>, %241 : vector<4xf32> -> vector<2xf32> | |
| %256 = spirv.Bitcast %255 : vector<2xf32> to vector<4xf16> | |
| %257 = spirv.FAdd %256, %251 : vector<4xf16> | |
| %258 = spirv.VectorShuffle [2 : i32, 3 : i32] %241 : vector<4xf32>, %241 : vector<4xf32> -> vector<2xf32> | |
| %259 = spirv.Bitcast %258 : vector<2xf32> to vector<4xf16> | |
| %260 = spirv.FAdd %259, %254 : vector<4xf16> | |
| %261 = spirv.Bitcast %260 : vector<4xf16> to vector<2xf32> | |
| %262 = spirv.Bitcast %257 : vector<4xf16> to vector<2xf32> | |
| %263 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %262 : vector<2xf32> -> vector<4xf32> | |
| %264 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %263 : vector<4xf32>, %261 : vector<2xf32> -> vector<4xf32> | |
| %265 = spirv.IAdd %210, %cst36864_i32 : i32 | |
| %266 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %266, %264 : vector<4xf32> | |
| %267 = spirv.IAdd %178, %cst55296_i32 : i32 | |
| %268 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %267] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %269 = spirv.Load "StorageBuffer" %268 : vector<4xf32> | |
| %270 = spirv.IAdd %187, %cst48_i32 : i32 | |
| %271 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %270] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %272 = spirv.Load "StorageBuffer" %271 : f16 | |
| %273 = spirv.IAdd %101, %cst432_i32 : i32 | |
| %274 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %273] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %275 = spirv.Load "Workgroup" %274 : vector<4xf32> | |
| %276 = spirv.VectorShuffle [0 : i32, 1 : i32] %275 : vector<4xf32>, %275 : vector<4xf32> -> vector<2xf32> | |
| %277 = spirv.Bitcast %276 : vector<2xf32> to vector<4xf16> | |
| %278 = spirv.CompositeConstruct %272, %272, %272, %272 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %279 = spirv.FAdd %277, %278 : vector<4xf16> | |
| %280 = spirv.VectorShuffle [2 : i32, 3 : i32] %275 : vector<4xf32>, %275 : vector<4xf32> -> vector<2xf32> | |
| %281 = spirv.Bitcast %280 : vector<2xf32> to vector<4xf16> | |
| %282 = spirv.FAdd %281, %278 : vector<4xf16> | |
| %283 = spirv.VectorShuffle [0 : i32, 1 : i32] %269 : vector<4xf32>, %269 : vector<4xf32> -> vector<2xf32> | |
| %284 = spirv.Bitcast %283 : vector<2xf32> to vector<4xf16> | |
| %285 = spirv.FAdd %284, %279 : vector<4xf16> | |
| %286 = spirv.VectorShuffle [2 : i32, 3 : i32] %269 : vector<4xf32>, %269 : vector<4xf32> -> vector<2xf32> | |
| %287 = spirv.Bitcast %286 : vector<2xf32> to vector<4xf16> | |
| %288 = spirv.FAdd %287, %282 : vector<4xf16> | |
| %289 = spirv.Bitcast %288 : vector<4xf16> to vector<2xf32> | |
| %290 = spirv.Bitcast %285 : vector<4xf16> to vector<2xf32> | |
| %291 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %290 : vector<2xf32> -> vector<4xf32> | |
| %292 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %291 : vector<4xf32>, %289 : vector<2xf32> -> vector<4xf32> | |
| %293 = spirv.IAdd %210, %cst55296_i32 : i32 | |
| %294 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %293] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %294, %292 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_30_generic_2x320x9216x2880, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_30_generic_2x320x9216x2880 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_34 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_34_generic_64x10x9216 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c72 = arith.constant 72 : index | |
| %c10 = arith.constant 10 : index | |
| %c64 = arith.constant 64 : index | |
| hal.return %c72, %c10, %c64 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_34_generic_64x10x9216() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %cst23040_i32 = spirv.Constant 23040 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 9.216000e+04 : f32 | |
| %cst_f32_0 = spirv.Constant 9.99999997E-7 : f32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst8_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst8_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32> | |
| %22 = spirv.FConvert %cst_f32_0 : f32 to f16 | |
| %23 = spirv.CompositeConstruct %22, %22, %22, %22 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32> | |
| %26 = spirv.IMul %21, %cst32_i32 : i32 | |
| %27 = spirv.IAdd %26, %25 : i32 | |
| %28 = spirv.IMul %17, %cst23040_i32 : i32 | |
| %29 = spirv.IAdd %27, %28 : i32 | |
| %30 = spirv.IMul %19, %cst2304_i32 : i32 | |
| %31 = spirv.IAdd %29, %30 : i32 | |
| %32 = spirv.IAdd %31, %9 : i32 | |
| %33 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %34 = spirv.Load "StorageBuffer" %33 : vector<4xf16> | |
| %35 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %17] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %36 = spirv.Load "StorageBuffer" %35 : f32 | |
| %37 = spirv.IAdd %17, %cst64_i32 : i32 | |
| %38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %39 = spirv.Load "StorageBuffer" %38 : f32 | |
| %40 = spirv.FDiv %39, %cst_f32 : f32 | |
| %41 = spirv.FConvert %40 : f32 to f16 | |
| %42 = spirv.CompositeConstruct %41, %41, %41, %41 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %43 = spirv.FAdd %42, %23 : vector<4xf16> | |
| %44 = spirv.FConvert %36 : f32 to f16 | |
| %45 = spirv.GL.InverseSqrt %43 : vector<4xf16> | |
| %46 = spirv.CompositeConstruct %44, %44, %44, %44 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %47 = spirv.FSub %34, %46 : vector<4xf16> | |
| %48 = spirv.FMul %47, %45 : vector<4xf16> | |
| %49 = spirv.IAdd %31, %15 : i32 | |
| %50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %50, %48 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_34_generic_64x10x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_34_generic_64x10x9216 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_35 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_35_generic_2x320x96x96 ordinal(0) layout(#pipeline_layout6) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c3 = arith.constant 3 : index | |
| %c24 = arith.constant 24 : index | |
| %c640 = arith.constant 640 : index | |
| hal.return %c3, %c24, %c640 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_35_generic_2x320x96x96() "None" { | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst960_i32 = spirv.Constant 960 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst2949120_i32 = spirv.Constant 2949120 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst10240_i32 = spirv.Constant 10240 : i32 | |
| %cst122880_i32 = spirv.Constant 122880 : i32 | |
| %cst30720_i32 = spirv.Constant 30720 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst24_i32 = spirv.Constant 24 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %7 = spirv.Load "PushConstant" %6 : i32 | |
| %8 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %9 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %10 = spirv.Select %8, %9, %1 : i1, i32 | |
| %11 = spirv.SDiv %10, %cst8_i32 : i32 | |
| %12 = spirv.ISub %cst-1_i32, %11 : i32 | |
| %13 = spirv.Select %8, %12, %11 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %14 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %16 = spirv.Select %14, %15, %3 : i1, i32 | |
| %17 = spirv.SDiv %16, %cst2_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %17 : i32 | |
| %19 = spirv.Select %14, %18, %17 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %20 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %22 = spirv.Select %20, %21, %5 : i1, i32 | |
| %23 = spirv.SDiv %22, %cst2_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %23 : i32 | |
| %25 = spirv.Select %20, %24, %23 : i1, i32 | |
| %26 = spirv.SLessThan %7, %cst0_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %28 = spirv.Select %26, %27, %7 : i1, i32 | |
| %29 = spirv.SDiv %28, %cst2_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %29 : i32 | |
| %31 = spirv.Select %26, %30, %29 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32> | |
| %34 = spirv.UDiv %33, %cst320_i32 : i32 | |
| %35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %36 = spirv.CompositeExtract %35[1 : i32] : vector<3xi32> | |
| %37 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %38 = spirv.CompositeExtract %37[0 : i32] : vector<3xi32> | |
| %39 = spirv.UMod %33, %cst320_i32 : i32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %41 = spirv.CompositeExtract %40[1 : i32] : vector<3xi32> | |
| %42 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %43 = spirv.CompositeExtract %42[0 : i32] : vector<3xi32> | |
| %44 = spirv.IMul %41, %cst24_i32 : i32 | |
| %45 = spirv.IMul %36, %cst96_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IMul %38, %cst8_i32 : i32 | |
| %48 = spirv.IAdd %46, %47 : i32 | |
| %49 = spirv.IAdd %48, %43 : i32 | |
| %50 = spirv.IMul %34, %cst737280_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IMul %39, %cst2304_i32 : i32 | |
| %53 = spirv.IAdd %51, %52 : i32 | |
| %54 = spirv.IAdd %53, %13 : i32 | |
| %55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %56 = spirv.Load "StorageBuffer" %55 : vector<4xf16> | |
| %57 = spirv.IAdd %39, %19 : i32 | |
| %58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %59 = spirv.Load "StorageBuffer" %58 : f16 | |
| %60 = spirv.IAdd %39, %25 : i32 | |
| %61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %62 = spirv.Load "StorageBuffer" %61 : f16 | |
| %63 = spirv.CompositeConstruct %59, %59, %59, %59 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %64 = spirv.FMul %56, %63 : vector<4xf16> | |
| %65 = spirv.CompositeConstruct %62, %62, %62, %62 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %66 = spirv.FAdd %64, %65 : vector<4xf16> | |
| %67 = spirv.CompositeExtract %66[0 : i32] : vector<4xf16> | |
| %68 = spirv.CompositeExtract %66[1 : i32] : vector<4xf16> | |
| %69 = spirv.CompositeExtract %66[2 : i32] : vector<4xf16> | |
| %70 = spirv.CompositeExtract %66[3 : i32] : vector<4xf16> | |
| %71 = spirv.IMul %41, %cst30720_i32 : i32 | |
| %72 = spirv.IMul %36, %cst122880_i32 : i32 | |
| %73 = spirv.IAdd %71, %72 : i32 | |
| %74 = spirv.IMul %38, %cst10240_i32 : i32 | |
| %75 = spirv.IAdd %73, %74 : i32 | |
| %76 = spirv.IMul %43, %cst1280_i32 : i32 | |
| %77 = spirv.IAdd %75, %76 : i32 | |
| %78 = spirv.IMul %34, %cst2949120_i32 : i32 | |
| %79 = spirv.IAdd %77, %78 : i32 | |
| %80 = spirv.IAdd %79, %39 : i32 | |
| %81 = spirv.IAdd %80, %31 : i32 | |
| %82 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %82, %67 : f16 | |
| %83 = spirv.IAdd %81, %cst320_i32 : i32 | |
| %84 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %84, %68 : f16 | |
| %85 = spirv.IAdd %81, %cst640_i32 : i32 | |
| %86 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %86, %69 : f16 | |
| %87 = spirv.IAdd %81, %cst960_i32 : i32 | |
| %88 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %88, %70 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_35_generic_2x320x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_35_generic_2x320x96x96 "LocalSize", 8, 4, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_36 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_36_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout6) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c288 = arith.constant 288 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c288, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_36_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst642_i32 = spirv.Constant 642 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst36_i32 = spirv.Constant 36 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst2560_i32 = spirv.Constant 2560 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %14 = spirv.Load "PushConstant" %13 : i32 | |
| %15 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %16 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %17 = spirv.Select %15, %16, %8 : i1, i32 | |
| %18 = spirv.SDiv %17, %cst16_i32 : i32 | |
| %19 = spirv.ISub %cst-1_i32, %18 : i32 | |
| %20 = spirv.Select %15, %19, %18 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %21 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %22 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %23 = spirv.Select %21, %22, %10 : i1, i32 | |
| %24 = spirv.SDiv %23, %cst16_i32 : i32 | |
| %25 = spirv.ISub %cst-1_i32, %24 : i32 | |
| %26 = spirv.Select %21, %25, %24 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %27 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %28 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %29 = spirv.Select %27, %28, %12 : i1, i32 | |
| %30 = spirv.SDiv %29, %cst16_i32 : i32 | |
| %31 = spirv.ISub %cst-1_i32, %30 : i32 | |
| %32 = spirv.Select %27, %31, %30 : i1, i32 | |
| %33 = spirv.SLessThan %14, %cst0_i32 : i32 | |
| %34 = spirv.ISub %cst-1_i32, %14 : i32 | |
| %35 = spirv.Select %33, %34, %14 : i1, i32 | |
| %36 = spirv.SDiv %35, %cst16_i32 : i32 | |
| %37 = spirv.ISub %cst-1_i32, %36 : i32 | |
| %38 = spirv.Select %33, %37, %36 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %39 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %40 = spirv.CompositeExtract %39[1 : i32] : vector<3xi32> | |
| %41 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %42 = spirv.CompositeExtract %41[0 : i32] : vector<3xi32> | |
| %43 = spirv.IMul %40, %cst2560_i32 : i32 | |
| %44 = spirv.IAdd %43, %2 : i32 | |
| %45 = spirv.IMul %4, %cst640_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IMul %6, %cst1280_i32 : i32 | |
| %48 = spirv.IAdd %46, %47 : i32 | |
| %49 = spirv.IAdd %48, %20 : i32 | |
| %50 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %51 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %52 = spirv.Select %50, %51, %2 : i1, i32 | |
| %53 = spirv.SDiv %52, %cst4_i32 : i32 | |
| %54 = spirv.ISub %cst-1_i32, %53 : i32 | |
| %55 = spirv.Select %50, %54, %53 : i1, i32 | |
| %56 = spirv.IMul %55, %cst36_i32 : i32 | |
| %57 = spirv.IAdd %49, %56 : i32 | |
| %58 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %59 = spirv.Load "StorageBuffer" %58 : vector<4xf32> | |
| %60 = spirv.IMul %4, %cst80_i32 : i32 | |
| %61 = spirv.IAdd %2, %60 : i32 | |
| %62 = spirv.IMul %6, %cst160_i32 : i32 | |
| %63 = spirv.IAdd %61, %62 : i32 | |
| %64 = spirv.IAdd %63, %55 : i32 | |
| %65 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %64] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %65, %59 : vector<4xf32> | |
| %66 = spirv.IAdd %57, %cst1280_i32 : i32 | |
| %67 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %68 = spirv.Load "StorageBuffer" %67 : vector<4xf32> | |
| %69 = spirv.IAdd %64, %cst160_i32 : i32 | |
| %70 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %70, %68 : vector<4xf32> | |
| %71 = spirv.IMul %4, %cst320_i32 : i32 | |
| %72 = spirv.IAdd %2, %71 : i32 | |
| %73 = spirv.IMul %6, %cst640_i32 : i32 | |
| %74 = spirv.IAdd %72, %73 : i32 | |
| %75 = spirv.IMul %42, %cst8_i32 : i32 | |
| %76 = spirv.IAdd %74, %75 : i32 | |
| %77 = spirv.IAdd %76, %26 : i32 | |
| %78 = spirv.SDiv %52, %cst8_i32 : i32 | |
| %79 = spirv.ISub %cst-1_i32, %78 : i32 | |
| %80 = spirv.Select %50, %79, %78 : i1, i32 | |
| %81 = spirv.IMul %80, %cst32_i32 : i32 | |
| %82 = spirv.IAdd %77, %81 : i32 | |
| %83 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %82] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %84 = spirv.Load "StorageBuffer" %83 : vector<4xf32> | |
| %85 = spirv.IMul %4, %cst72_i32 : i32 | |
| %86 = spirv.IAdd %2, %85 : i32 | |
| %87 = spirv.IMul %6, %cst144_i32 : i32 | |
| %88 = spirv.IAdd %86, %87 : i32 | |
| %89 = spirv.IAdd %88, %80 : i32 | |
| %90 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %90, %84 : vector<4xf32> | |
| %91 = spirv.IAdd %82, %cst640_i32 : i32 | |
| %92 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %93 = spirv.Load "StorageBuffer" %92 : vector<4xf32> | |
| %94 = spirv.IAdd %89, %cst144_i32 : i32 | |
| %95 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %95, %93 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %96 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %97 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %98 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %99 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %100 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%168: i32, %169: !spirv.coopmatrix<16x16xf16, Subgroup>, %170: !spirv.coopmatrix<16x16xf16, Subgroup>, %171: !spirv.coopmatrix<16x16xf16, Subgroup>, %172: !spirv.coopmatrix<16x16xf16, Subgroup>, %173: i32): // 2 preds: ^bb0, ^bb2 | |
| %174 = spirv.SLessThan %168, %cst288_i32 : i32 | |
| spirv.BranchConditional %174, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %175 = spirv.IMul %173, %cst320_i32 : i32 | |
| %176 = spirv.IMul %4, %cst160_i32 : i32 | |
| %177 = spirv.IAdd %175, %176 : i32 | |
| %178 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %177] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %179 = spirv.NV.CooperativeMatrixLoad %178, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %180 = spirv.IAdd %177, %cst2_i32 : i32 | |
| %181 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %182 = spirv.NV.CooperativeMatrixLoad %181, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %183 = spirv.IAdd %177, %cst80_i32 : i32 | |
| %184 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %183] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %185 = spirv.NV.CooperativeMatrixLoad %184, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %186 = spirv.IAdd %177, %cst82_i32 : i32 | |
| %187 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %186] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %188 = spirv.NV.CooperativeMatrixLoad %187, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %189 = spirv.IMul %173, %cst288_i32 : i32 | |
| %190 = spirv.SDiv %52, %cst32_i32 : i32 | |
| %191 = spirv.ISub %cst-1_i32, %190 : i32 | |
| %192 = spirv.Select %50, %191, %190 : i1, i32 | |
| %193 = spirv.IMul %192, %cst4_i32 : i32 | |
| %194 = spirv.IAdd %189, %193 : i32 | |
| %195 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %196 = spirv.NV.CooperativeMatrixLoad %195, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %197 = spirv.IAdd %194, %cst2_i32 : i32 | |
| %198 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %199 = spirv.NV.CooperativeMatrixLoad %198, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %200 = spirv.IAdd %194, %cst144_i32 : i32 | |
| %201 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %202 = spirv.NV.CooperativeMatrixLoad %201, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %203 = spirv.IAdd %194, %cst146_i32 : i32 | |
| %204 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %205 = spirv.NV.CooperativeMatrixLoad %204, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %206 = spirv.NV.CooperativeMatrixMulAdd %179, %196, %169 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %207 = spirv.NV.CooperativeMatrixMulAdd %182, %202, %206 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %208 = spirv.NV.CooperativeMatrixMulAdd %179, %199, %170 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %209 = spirv.NV.CooperativeMatrixMulAdd %182, %205, %208 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %210 = spirv.NV.CooperativeMatrixMulAdd %185, %196, %171 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %211 = spirv.NV.CooperativeMatrixMulAdd %188, %202, %210 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %212 = spirv.NV.CooperativeMatrixMulAdd %185, %199, %172 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %213 = spirv.NV.CooperativeMatrixMulAdd %188, %205, %212 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %214 = spirv.IAdd %168, %cst32_i32 : i32 | |
| %215 = spirv.SLessThan %214, %cst0_i32 : i32 | |
| %216 = spirv.ISub %cst-33_i32, %168 : i32 | |
| %217 = spirv.Select %215, %216, %214 : i1, i32 | |
| %218 = spirv.SDiv %217, %cst8_i32 : i32 | |
| %219 = spirv.ISub %cst-1_i32, %218 : i32 | |
| %220 = spirv.Select %215, %219, %218 : i1, i32 | |
| %221 = spirv.IAdd %49, %220 : i32 | |
| %222 = spirv.IAdd %221, %56 : i32 | |
| %223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %224 = spirv.Load "StorageBuffer" %223 : vector<4xf32> | |
| %225 = spirv.SDiv %217, %cst32_i32 : i32 | |
| %226 = spirv.ISub %cst-1_i32, %225 : i32 | |
| %227 = spirv.Select %215, %226, %225 : i1, i32 | |
| %228 = spirv.GL.SAbs %227 : i32 | |
| %229 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %230 = spirv.UMod %228, %229 : i32 | |
| %231 = spirv.IEqual %227, %228 : i32 | |
| %232 = spirv.SNegate %230 : i32 | |
| %233 = spirv.Select %231, %230, %232 : i1, i32 | |
| %234 = spirv.SLessThan %233, %cst0_i32 : i32 | |
| %235 = spirv.IAdd %233, %cst2_i32 : i32 | |
| %236 = spirv.Select %234, %235, %233 : i1, i32 | |
| %237 = spirv.IMul %227, %cst320_i32 : i32 | |
| %238 = spirv.IAdd %237, %63 : i32 | |
| %239 = spirv.SLessThan %227, %cst0_i32 : i32 | |
| %240 = spirv.ISub %cst-1_i32, %227 : i32 | |
| %241 = spirv.Select %239, %240, %227 : i1, i32 | |
| %242 = spirv.SDiv %241, %cst2_i32 : i32 | |
| %243 = spirv.ISub %cst-1_i32, %242 : i32 | |
| %244 = spirv.Select %239, %243, %242 : i1, i32 | |
| %245 = spirv.IMul %244, %cst-640_i32 : i32 | |
| %246 = spirv.IAdd %238, %245 : i32 | |
| %247 = spirv.IAdd %246, %55 : i32 | |
| %248 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %248, %224 : vector<4xf32> | |
| %249 = spirv.IAdd %222, %cst1280_i32 : i32 | |
| %250 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %249] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %251 = spirv.Load "StorageBuffer" %250 : vector<4xf32> | |
| %252 = spirv.IAdd %247, %cst160_i32 : i32 | |
| %253 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %252] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %253, %251 : vector<4xf32> | |
| %254 = spirv.IMul %214, %cst40_i32 : i32 | |
| %255 = spirv.IAdd %254, %2 : i32 | |
| %256 = spirv.IAdd %255, %71 : i32 | |
| %257 = spirv.IAdd %256, %73 : i32 | |
| %258 = spirv.IAdd %257, %75 : i32 | |
| %259 = spirv.IAdd %258, %26 : i32 | |
| %260 = spirv.IAdd %259, %81 : i32 | |
| %261 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %260] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %262 = spirv.Load "StorageBuffer" %261 : vector<4xf32> | |
| %263 = spirv.IMul %227, %cst288_i32 : i32 | |
| %264 = spirv.IAdd %263, %88 : i32 | |
| %265 = spirv.IMul %244, %cst-576_i32 : i32 | |
| %266 = spirv.IAdd %264, %265 : i32 | |
| %267 = spirv.IAdd %266, %80 : i32 | |
| %268 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %267] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %268, %262 : vector<4xf32> | |
| %269 = spirv.IAdd %260, %cst640_i32 : i32 | |
| %270 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %269] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %271 = spirv.Load "StorageBuffer" %270 : vector<4xf32> | |
| %272 = spirv.IAdd %267, %cst144_i32 : i32 | |
| %273 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %272] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %273, %271 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %96, %207 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %97, %209 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %98, %211 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %99, %213 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %100, %236 : i32 | |
| spirv.Branch ^bb1(%214, %207, %209, %211, %213, %236 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %101 = spirv.Load "Function" %100 : i32 | |
| %102 = spirv.Load "Function" %99 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %103 = spirv.Load "Function" %98 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %104 = spirv.Load "Function" %97 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %105 = spirv.Load "Function" %96 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %106 = spirv.IMul %4, %cst160_i32 : i32 | |
| %107 = spirv.IMul %101, %cst320_i32 : i32 | |
| %108 = spirv.IAdd %106, %107 : i32 | |
| %109 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %108] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %110 = spirv.NV.CooperativeMatrixLoad %109, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %111 = spirv.IAdd %108, %cst2_i32 : i32 | |
| %112 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %111] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %113 = spirv.NV.CooperativeMatrixLoad %112, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %114 = spirv.IAdd %108, %cst80_i32 : i32 | |
| %115 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %116 = spirv.NV.CooperativeMatrixLoad %115, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %117 = spirv.IAdd %108, %cst82_i32 : i32 | |
| %118 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %119 = spirv.NV.CooperativeMatrixLoad %118, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %120 = spirv.IMul %101, %cst288_i32 : i32 | |
| %121 = spirv.SDiv %52, %cst32_i32 : i32 | |
| %122 = spirv.ISub %cst-1_i32, %121 : i32 | |
| %123 = spirv.Select %50, %122, %121 : i1, i32 | |
| %124 = spirv.IMul %123, %cst4_i32 : i32 | |
| %125 = spirv.IAdd %120, %124 : i32 | |
| %126 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %127 = spirv.NV.CooperativeMatrixLoad %126, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %128 = spirv.IAdd %125, %cst2_i32 : i32 | |
| %129 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.IAdd %125, %cst144_i32 : i32 | |
| %132 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %134 = spirv.IAdd %125, %cst146_i32 : i32 | |
| %135 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %134] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %136 = spirv.NV.CooperativeMatrixLoad %135, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %137 = spirv.NV.CooperativeMatrixMulAdd %110, %127, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %138 = spirv.NV.CooperativeMatrixMulAdd %113, %133, %137 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %139 = spirv.NV.CooperativeMatrixMulAdd %110, %130, %104 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %140 = spirv.NV.CooperativeMatrixMulAdd %113, %136, %139 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %141 = spirv.NV.CooperativeMatrixMulAdd %116, %127, %103 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.NV.CooperativeMatrixMulAdd %119, %133, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %143 = spirv.NV.CooperativeMatrixMulAdd %116, %130, %102 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %144 = spirv.NV.CooperativeMatrixMulAdd %119, %136, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.IAdd %75, %32 : i32 | |
| %146 = spirv.IAdd %145, %124 : i32 | |
| %147 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %146] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %148 = spirv.NV.CooperativeMatrixLoad %147, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %149 = spirv.IAdd %146, %cst2_i32 : i32 | |
| %150 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %149] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %151 = spirv.NV.CooperativeMatrixLoad %150, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %152 = spirv.FAdd %138, %148 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %153 = spirv.FAdd %140, %151 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %154 = spirv.FAdd %142, %148 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %155 = spirv.FAdd %144, %151 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %156 = spirv.IMul %4, %cst1280_i32 : i32 | |
| %157 = spirv.IAdd %43, %156 : i32 | |
| %158 = spirv.IAdd %157, %75 : i32 | |
| %159 = spirv.IAdd %158, %38 : i32 | |
| %160 = spirv.IAdd %159, %124 : i32 | |
| %161 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %160] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %161, %152, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %162 = spirv.IAdd %160, %cst2_i32 : i32 | |
| %163 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %163, %153, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %164 = spirv.IAdd %160, %cst640_i32 : i32 | |
| %165 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %165, %154, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %166 = spirv.IAdd %160, %cst642_i32 : i32 | |
| %167 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %166] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %167, %155, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_36_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_36_matmul_18432x320x320 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_38 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_38_generic_18432x320 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [160 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index): | |
| %c18432 = arith.constant 18432 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c18432, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_38_generic_18432x320() "None" { | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst_f32 = spirv.Constant 0.000000e+00 : f32 | |
| %cst_f32_0 = spirv.Constant 3.200000e+02 : f32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %7 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %8 = spirv.Select %6, %7, %3 : i1, i32 | |
| %9 = spirv.SDiv %8, %cst8_i32 : i32 | |
| %10 = spirv.ISub %cst-1_i32, %9 : i32 | |
| %11 = spirv.Select %6, %10, %9 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer> | |
| %12 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %13 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %14 = spirv.Select %12, %13, %5 : i1, i32 | |
| %15 = spirv.SDiv %14, %cst4_i32 : i32 | |
| %16 = spirv.ISub %cst-1_i32, %15 : i32 | |
| %17 = spirv.Select %12, %16, %15 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[0 : i32] : vector<3xi32> | |
| %20 = spirv.IMul %19, %cst160_i32 : i32 | |
| %21 = spirv.IAdd %20, %1 : i32 | |
| %22 = spirv.IAdd %21, %11 : i32 | |
| %23 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %24 = spirv.Load "StorageBuffer" %23 : vector<2xf32> | |
| %25 = spirv.CompositeExtract %24[0 : i32] : vector<2xf32> | |
| %26 = spirv.CompositeExtract %24[1 : i32] : vector<2xf32> | |
| %27 = spirv.FAdd %25, %26 : f32 | |
| %28 = spirv.GroupNonUniformShuffleXor <Subgroup> %27, %cst1_i32 : f32, i32 | |
| %29 = spirv.FAdd %27, %28 : f32 | |
| %30 = spirv.GroupNonUniformShuffleXor <Subgroup> %29, %cst2_i32 : f32, i32 | |
| %31 = spirv.FAdd %29, %30 : f32 | |
| %32 = spirv.GroupNonUniformShuffleXor <Subgroup> %31, %cst4_i32 : f32, i32 | |
| %33 = spirv.FAdd %31, %32 : f32 | |
| %34 = spirv.GroupNonUniformShuffleXor <Subgroup> %33, %cst8_i32 : f32, i32 | |
| %35 = spirv.FAdd %33, %34 : f32 | |
| %36 = spirv.GroupNonUniformShuffleXor <Subgroup> %35, %cst16_i32 : f32, i32 | |
| %37 = spirv.FAdd %35, %36 : f32 | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup> | |
| %38 = spirv.UDiv %1, %cst32_i32 : i32 | |
| %39 = spirv.UMod %1, %cst32_i32 : i32 | |
| %40 = spirv.IEqual %39, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %40, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %56 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %56, %37 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %41 = spirv.GL.UMin %39, %cst4_i32 : i32 | |
| %42 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32 | |
| %43 = spirv.Load "Workgroup" %42 : f32 | |
| %44 = spirv.SGreaterThanEqual %39, %cst5_i32 : i32 | |
| %45 = spirv.Select %44, %cst_f32, %43 : i1, f32 | |
| %46 = spirv.GroupNonUniformShuffleXor <Subgroup> %45, %cst1_i32 : f32, i32 | |
| %47 = spirv.FAdd %45, %46 : f32 | |
| %48 = spirv.GroupNonUniformShuffleXor <Subgroup> %47, %cst2_i32 : f32, i32 | |
| %49 = spirv.FAdd %47, %48 : f32 | |
| %50 = spirv.GroupNonUniformShuffleXor <Subgroup> %49, %cst4_i32 : f32, i32 | |
| %51 = spirv.FAdd %49, %50 : f32 | |
| %52 = spirv.GroupNonUniformShuffle <Subgroup> %51, %cst0_i32 : f32, i32 | |
| %53 = spirv.FAdd %52, %cst_f32 : f32 | |
| %54 = spirv.FDiv %53, %cst_f32_0 : f32 | |
| %55 = spirv.IEqual %1, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %55, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %56 = spirv.IAdd %19, %17 : i32 | |
| %57 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %57, %54 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_38_generic_18432x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_38_generic_18432x320 "LocalSize", 160, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_39 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_39_generic_18432x320 ordinal(0) layout(#pipeline_layout9) attributes {translation_info = #translation3, workgroup_size = [160 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c18432 = arith.constant 18432 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c18432, %c1, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, StorageBuffer16BitAccess, GroupNonUniformShuffle, Float16], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage]> { | |
| spirv.GlobalVariable @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0__1 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_39_generic_18432x320() "None" { | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst_f32 = spirv.Constant 0.000000e+00 : f32 | |
| %cst_vec_2xf32 = spirv.Constant dense<3.200000e+02> : vector<2xf32> | |
| %cst_f32_0 = spirv.Constant 9.99999974E-6 : f32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant> | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %7 = spirv.Load "PushConstant" %6 : i32 | |
| %8 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %9 = spirv.Load "PushConstant" %8 : i32 | |
| %10 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %11 = spirv.Load "PushConstant" %10 : i32 | |
| %12 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst5_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %13 = spirv.Load "PushConstant" %12 : i32 | |
| %14 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %16 = spirv.Select %14, %15, %3 : i1, i32 | |
| %17 = spirv.SDiv %16, %cst8_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %17 : i32 | |
| %19 = spirv.Select %14, %18, %17 : i1, i32 | |
| %__resource_var_0_0__1_addr = spirv.mlir.addressof @__resource_var_0_0__1 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer> | |
| %20 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %22 = spirv.Select %20, %21, %5 : i1, i32 | |
| %23 = spirv.SDiv %22, %cst4_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %23 : i32 | |
| %25 = spirv.Select %20, %24, %23 : i1, i32 | |
| %__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer> | |
| %26 = spirv.SLessThan %7, %cst0_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %28 = spirv.Select %26, %27, %7 : i1, i32 | |
| %29 = spirv.SDiv %28, %cst4_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %29 : i32 | |
| %31 = spirv.Select %26, %30, %29 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| %32 = spirv.SLessThan %9, %cst0_i32 : i32 | |
| %33 = spirv.ISub %cst-1_i32, %9 : i32 | |
| %34 = spirv.Select %32, %33, %9 : i1, i32 | |
| %35 = spirv.SDiv %34, %cst4_i32 : i32 | |
| %36 = spirv.ISub %cst-1_i32, %35 : i32 | |
| %37 = spirv.Select %32, %36, %35 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| %38 = spirv.SLessThan %11, %cst0_i32 : i32 | |
| %39 = spirv.ISub %cst-1_i32, %11 : i32 | |
| %40 = spirv.Select %38, %39, %11 : i1, i32 | |
| %41 = spirv.SDiv %40, %cst4_i32 : i32 | |
| %42 = spirv.ISub %cst-1_i32, %41 : i32 | |
| %43 = spirv.Select %38, %42, %41 : i1, i32 | |
| %44 = spirv.SLessThan %13, %cst0_i32 : i32 | |
| %45 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %46 = spirv.Select %44, %45, %13 : i1, i32 | |
| %47 = spirv.SDiv %46, %cst4_i32 : i32 | |
| %48 = spirv.ISub %cst-1_i32, %47 : i32 | |
| %49 = spirv.Select %44, %48, %47 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %50 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %51 = spirv.CompositeExtract %50[0 : i32] : vector<3xi32> | |
| %52 = spirv.IMul %51, %cst160_i32 : i32 | |
| %53 = spirv.IAdd %52, %1 : i32 | |
| %54 = spirv.IAdd %53, %19 : i32 | |
| %55 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %56 = spirv.Load "StorageBuffer" %55 : vector<2xf32> | |
| %57 = spirv.IAdd %51, %25 : i32 | |
| %58 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %59 = spirv.Load "StorageBuffer" %58 : f32 | |
| %60 = spirv.CompositeConstruct %59, %59 : (f32, f32) -> vector<2xf32> | |
| %61 = spirv.FSub %56, %60 : vector<2xf32> | |
| %62 = spirv.FMul %61, %61 : vector<2xf32> | |
| %63 = spirv.CompositeExtract %62[0 : i32] : vector<2xf32> | |
| %64 = spirv.CompositeExtract %62[1 : i32] : vector<2xf32> | |
| %65 = spirv.FAdd %63, %64 : f32 | |
| %66 = spirv.GroupNonUniformShuffleXor <Subgroup> %65, %cst1_i32 : f32, i32 | |
| %67 = spirv.FAdd %65, %66 : f32 | |
| %68 = spirv.GroupNonUniformShuffleXor <Subgroup> %67, %cst2_i32 : f32, i32 | |
| %69 = spirv.FAdd %67, %68 : f32 | |
| %70 = spirv.GroupNonUniformShuffleXor <Subgroup> %69, %cst4_i32 : f32, i32 | |
| %71 = spirv.FAdd %69, %70 : f32 | |
| %72 = spirv.GroupNonUniformShuffleXor <Subgroup> %71, %cst8_i32 : f32, i32 | |
| %73 = spirv.FAdd %71, %72 : f32 | |
| %74 = spirv.GroupNonUniformShuffleXor <Subgroup> %73, %cst16_i32 : f32, i32 | |
| %75 = spirv.FAdd %73, %74 : f32 | |
| %__workgroup_mem__8_addr = spirv.mlir.addressof @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup> | |
| %76 = spirv.UDiv %1, %cst32_i32 : i32 | |
| %77 = spirv.UMod %1, %cst32_i32 : i32 | |
| %78 = spirv.IEqual %77, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %78, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %116 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %116, %75 : f32 | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %79 = spirv.GL.UMin %77, %cst4_i32 : i32 | |
| %80 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32 | |
| %81 = spirv.Load "Workgroup" %80 : f32 | |
| %82 = spirv.SGreaterThanEqual %77, %cst5_i32 : i32 | |
| %83 = spirv.Select %82, %cst_f32, %81 : i1, f32 | |
| %84 = spirv.GroupNonUniformShuffleXor <Subgroup> %83, %cst1_i32 : f32, i32 | |
| %85 = spirv.FAdd %83, %84 : f32 | |
| %86 = spirv.GroupNonUniformShuffleXor <Subgroup> %85, %cst2_i32 : f32, i32 | |
| %87 = spirv.FAdd %85, %86 : f32 | |
| %88 = spirv.GroupNonUniformShuffleXor <Subgroup> %87, %cst4_i32 : f32, i32 | |
| %89 = spirv.FAdd %87, %88 : f32 | |
| %90 = spirv.GroupNonUniformShuffle <Subgroup> %89, %cst0_i32 : f32, i32 | |
| %91 = spirv.FAdd %90, %cst_f32 : f32 | |
| %92 = spirv.IAdd %53, %31 : i32 | |
| %93 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %94 = spirv.Load "StorageBuffer" %93 : vector<2xf16> | |
| %95 = spirv.IAdd %1, %37 : i32 | |
| %96 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %95] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %97 = spirv.Load "StorageBuffer" %96 : vector<2xf16> | |
| %98 = spirv.IAdd %1, %43 : i32 | |
| %99 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| %100 = spirv.Load "StorageBuffer" %99 : vector<2xf16> | |
| %101 = spirv.CompositeConstruct %91, %91 : (f32, f32) -> vector<2xf32> | |
| %102 = spirv.FConvert %cst_f32_0 : f32 to f16 | |
| %103 = spirv.CompositeConstruct %102, %102 : (f16, f16) -> vector<2xf16> | |
| %104 = spirv.FDiv %101, %cst_vec_2xf32 : vector<2xf32> | |
| %105 = spirv.FConvert %104 : vector<2xf32> to vector<2xf16> | |
| %106 = spirv.FAdd %105, %103 : vector<2xf16> | |
| %107 = spirv.FConvert %59 : f32 to f16 | |
| %108 = spirv.CompositeConstruct %107, %107 : (f16, f16) -> vector<2xf16> | |
| %109 = spirv.GL.InverseSqrt %106 : vector<2xf16> | |
| %110 = spirv.FSub %94, %108 : vector<2xf16> | |
| %111 = spirv.FMul %110, %109 : vector<2xf16> | |
| %112 = spirv.FMul %111, %97 : vector<2xf16> | |
| %113 = spirv.FAdd %112, %100 : vector<2xf16> | |
| %114 = spirv.IAdd %53, %49 : i32 | |
| %115 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %115, %113 : vector<2xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_39_generic_18432x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_39_generic_18432x320 "LocalSize", 160, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_40 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_40_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout3) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c5 = arith.constant 5 : index | |
| %c288 = arith.constant 288 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c288, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_40_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst642_i32 = spirv.Constant 642 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst36_i32 = spirv.Constant 36 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst2560_i32 = spirv.Constant 2560 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %15 = spirv.Select %13, %14, %8 : i1, i32 | |
| %16 = spirv.SDiv %15, %cst16_i32 : i32 | |
| %17 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %18 = spirv.Select %13, %17, %16 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %19 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %20 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %21 = spirv.Select %19, %20, %10 : i1, i32 | |
| %22 = spirv.SDiv %21, %cst16_i32 : i32 | |
| %23 = spirv.ISub %cst-1_i32, %22 : i32 | |
| %24 = spirv.Select %19, %23, %22 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %25 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %26 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %27 = spirv.Select %25, %26, %12 : i1, i32 | |
| %28 = spirv.SDiv %27, %cst16_i32 : i32 | |
| %29 = spirv.ISub %cst-1_i32, %28 : i32 | |
| %30 = spirv.Select %25, %29, %28 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %32 = spirv.CompositeExtract %31[1 : i32] : vector<3xi32> | |
| %33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %34 = spirv.CompositeExtract %33[0 : i32] : vector<3xi32> | |
| %35 = spirv.IMul %32, %cst2560_i32 : i32 | |
| %36 = spirv.IAdd %35, %2 : i32 | |
| %37 = spirv.IMul %4, %cst640_i32 : i32 | |
| %38 = spirv.IAdd %36, %37 : i32 | |
| %39 = spirv.IMul %6, %cst1280_i32 : i32 | |
| %40 = spirv.IAdd %38, %39 : i32 | |
| %41 = spirv.IAdd %40, %18 : i32 | |
| %42 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %43 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %44 = spirv.Select %42, %43, %2 : i1, i32 | |
| %45 = spirv.SDiv %44, %cst4_i32 : i32 | |
| %46 = spirv.ISub %cst-1_i32, %45 : i32 | |
| %47 = spirv.Select %42, %46, %45 : i1, i32 | |
| %48 = spirv.IMul %47, %cst36_i32 : i32 | |
| %49 = spirv.IAdd %41, %48 : i32 | |
| %50 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %51 = spirv.Load "StorageBuffer" %50 : vector<4xf32> | |
| %52 = spirv.IMul %4, %cst80_i32 : i32 | |
| %53 = spirv.IAdd %2, %52 : i32 | |
| %54 = spirv.IMul %6, %cst160_i32 : i32 | |
| %55 = spirv.IAdd %53, %54 : i32 | |
| %56 = spirv.IAdd %55, %47 : i32 | |
| %57 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %57, %51 : vector<4xf32> | |
| %58 = spirv.IAdd %49, %cst1280_i32 : i32 | |
| %59 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %60 = spirv.Load "StorageBuffer" %59 : vector<4xf32> | |
| %61 = spirv.IAdd %56, %cst160_i32 : i32 | |
| %62 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %62, %60 : vector<4xf32> | |
| %63 = spirv.IMul %4, %cst320_i32 : i32 | |
| %64 = spirv.IAdd %2, %63 : i32 | |
| %65 = spirv.IMul %6, %cst640_i32 : i32 | |
| %66 = spirv.IAdd %64, %65 : i32 | |
| %67 = spirv.IMul %34, %cst8_i32 : i32 | |
| %68 = spirv.IAdd %66, %67 : i32 | |
| %69 = spirv.IAdd %68, %24 : i32 | |
| %70 = spirv.SDiv %44, %cst8_i32 : i32 | |
| %71 = spirv.ISub %cst-1_i32, %70 : i32 | |
| %72 = spirv.Select %42, %71, %70 : i1, i32 | |
| %73 = spirv.IMul %72, %cst32_i32 : i32 | |
| %74 = spirv.IAdd %69, %73 : i32 | |
| %75 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %76 = spirv.Load "StorageBuffer" %75 : vector<4xf32> | |
| %77 = spirv.IMul %4, %cst72_i32 : i32 | |
| %78 = spirv.IAdd %2, %77 : i32 | |
| %79 = spirv.IMul %6, %cst144_i32 : i32 | |
| %80 = spirv.IAdd %78, %79 : i32 | |
| %81 = spirv.IAdd %80, %72 : i32 | |
| %82 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %82, %76 : vector<4xf32> | |
| %83 = spirv.IAdd %74, %cst640_i32 : i32 | |
| %84 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %85 = spirv.Load "StorageBuffer" %84 : vector<4xf32> | |
| %86 = spirv.IAdd %81, %cst144_i32 : i32 | |
| %87 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %87, %85 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %88 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %89 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %90 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %91 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %92 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%149: i32, %150: !spirv.coopmatrix<16x16xf16, Subgroup>, %151: !spirv.coopmatrix<16x16xf16, Subgroup>, %152: !spirv.coopmatrix<16x16xf16, Subgroup>, %153: !spirv.coopmatrix<16x16xf16, Subgroup>, %154: i32): // 2 preds: ^bb0, ^bb2 | |
| %155 = spirv.SLessThan %149, %cst288_i32 : i32 | |
| spirv.BranchConditional %155, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %156 = spirv.IMul %154, %cst320_i32 : i32 | |
| %157 = spirv.IMul %4, %cst160_i32 : i32 | |
| %158 = spirv.IAdd %156, %157 : i32 | |
| %159 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %160 = spirv.NV.CooperativeMatrixLoad %159, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %161 = spirv.IAdd %158, %cst2_i32 : i32 | |
| %162 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %161] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %163 = spirv.NV.CooperativeMatrixLoad %162, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %164 = spirv.IAdd %158, %cst80_i32 : i32 | |
| %165 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %166 = spirv.NV.CooperativeMatrixLoad %165, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %167 = spirv.IAdd %158, %cst82_i32 : i32 | |
| %168 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %169 = spirv.NV.CooperativeMatrixLoad %168, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %170 = spirv.IMul %154, %cst288_i32 : i32 | |
| %171 = spirv.SDiv %44, %cst32_i32 : i32 | |
| %172 = spirv.ISub %cst-1_i32, %171 : i32 | |
| %173 = spirv.Select %42, %172, %171 : i1, i32 | |
| %174 = spirv.IMul %173, %cst4_i32 : i32 | |
| %175 = spirv.IAdd %170, %174 : i32 | |
| %176 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %177 = spirv.NV.CooperativeMatrixLoad %176, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %178 = spirv.IAdd %175, %cst2_i32 : i32 | |
| %179 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %180 = spirv.NV.CooperativeMatrixLoad %179, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %181 = spirv.IAdd %175, %cst144_i32 : i32 | |
| %182 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %183 = spirv.NV.CooperativeMatrixLoad %182, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %184 = spirv.IAdd %175, %cst146_i32 : i32 | |
| %185 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %184] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %186 = spirv.NV.CooperativeMatrixLoad %185, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %187 = spirv.NV.CooperativeMatrixMulAdd %160, %177, %150 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %188 = spirv.NV.CooperativeMatrixMulAdd %163, %183, %187 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %189 = spirv.NV.CooperativeMatrixMulAdd %160, %180, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %190 = spirv.NV.CooperativeMatrixMulAdd %163, %186, %189 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %191 = spirv.NV.CooperativeMatrixMulAdd %166, %177, %152 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %192 = spirv.NV.CooperativeMatrixMulAdd %169, %183, %191 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %193 = spirv.NV.CooperativeMatrixMulAdd %166, %180, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %194 = spirv.NV.CooperativeMatrixMulAdd %169, %186, %193 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %195 = spirv.IAdd %149, %cst32_i32 : i32 | |
| %196 = spirv.SLessThan %195, %cst0_i32 : i32 | |
| %197 = spirv.ISub %cst-33_i32, %149 : i32 | |
| %198 = spirv.Select %196, %197, %195 : i1, i32 | |
| %199 = spirv.SDiv %198, %cst8_i32 : i32 | |
| %200 = spirv.ISub %cst-1_i32, %199 : i32 | |
| %201 = spirv.Select %196, %200, %199 : i1, i32 | |
| %202 = spirv.IAdd %41, %201 : i32 | |
| %203 = spirv.IAdd %202, %48 : i32 | |
| %204 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %205 = spirv.Load "StorageBuffer" %204 : vector<4xf32> | |
| %206 = spirv.SDiv %198, %cst32_i32 : i32 | |
| %207 = spirv.ISub %cst-1_i32, %206 : i32 | |
| %208 = spirv.Select %196, %207, %206 : i1, i32 | |
| %209 = spirv.GL.SAbs %208 : i32 | |
| %210 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %211 = spirv.UMod %209, %210 : i32 | |
| %212 = spirv.IEqual %208, %209 : i32 | |
| %213 = spirv.SNegate %211 : i32 | |
| %214 = spirv.Select %212, %211, %213 : i1, i32 | |
| %215 = spirv.SLessThan %214, %cst0_i32 : i32 | |
| %216 = spirv.IAdd %214, %cst2_i32 : i32 | |
| %217 = spirv.Select %215, %216, %214 : i1, i32 | |
| %218 = spirv.IMul %208, %cst320_i32 : i32 | |
| %219 = spirv.IAdd %218, %55 : i32 | |
| %220 = spirv.SLessThan %208, %cst0_i32 : i32 | |
| %221 = spirv.ISub %cst-1_i32, %208 : i32 | |
| %222 = spirv.Select %220, %221, %208 : i1, i32 | |
| %223 = spirv.SDiv %222, %cst2_i32 : i32 | |
| %224 = spirv.ISub %cst-1_i32, %223 : i32 | |
| %225 = spirv.Select %220, %224, %223 : i1, i32 | |
| %226 = spirv.IMul %225, %cst-640_i32 : i32 | |
| %227 = spirv.IAdd %219, %226 : i32 | |
| %228 = spirv.IAdd %227, %47 : i32 | |
| %229 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %228] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %229, %205 : vector<4xf32> | |
| %230 = spirv.IAdd %203, %cst1280_i32 : i32 | |
| %231 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %230] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %232 = spirv.Load "StorageBuffer" %231 : vector<4xf32> | |
| %233 = spirv.IAdd %228, %cst160_i32 : i32 | |
| %234 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %233] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %234, %232 : vector<4xf32> | |
| %235 = spirv.IMul %195, %cst40_i32 : i32 | |
| %236 = spirv.IAdd %235, %2 : i32 | |
| %237 = spirv.IAdd %236, %63 : i32 | |
| %238 = spirv.IAdd %237, %65 : i32 | |
| %239 = spirv.IAdd %238, %67 : i32 | |
| %240 = spirv.IAdd %239, %24 : i32 | |
| %241 = spirv.IAdd %240, %73 : i32 | |
| %242 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %241] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %243 = spirv.Load "StorageBuffer" %242 : vector<4xf32> | |
| %244 = spirv.IMul %208, %cst288_i32 : i32 | |
| %245 = spirv.IAdd %244, %80 : i32 | |
| %246 = spirv.IMul %225, %cst-576_i32 : i32 | |
| %247 = spirv.IAdd %245, %246 : i32 | |
| %248 = spirv.IAdd %247, %72 : i32 | |
| %249 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %249, %243 : vector<4xf32> | |
| %250 = spirv.IAdd %241, %cst640_i32 : i32 | |
| %251 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %250] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %252 = spirv.Load "StorageBuffer" %251 : vector<4xf32> | |
| %253 = spirv.IAdd %248, %cst144_i32 : i32 | |
| %254 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %253] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %254, %252 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %88, %188 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %89, %190 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %90, %192 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %91, %194 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %92, %217 : i32 | |
| spirv.Branch ^bb1(%195, %188, %190, %192, %194, %217 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %93 = spirv.Load "Function" %92 : i32 | |
| %94 = spirv.Load "Function" %91 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %95 = spirv.Load "Function" %90 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %96 = spirv.Load "Function" %89 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %97 = spirv.Load "Function" %88 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %98 = spirv.IMul %4, %cst160_i32 : i32 | |
| %99 = spirv.IMul %93, %cst320_i32 : i32 | |
| %100 = spirv.IAdd %98, %99 : i32 | |
| %101 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %100] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %102 = spirv.NV.CooperativeMatrixLoad %101, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %103 = spirv.IAdd %100, %cst2_i32 : i32 | |
| %104 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %105 = spirv.NV.CooperativeMatrixLoad %104, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %106 = spirv.IAdd %100, %cst80_i32 : i32 | |
| %107 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %108 = spirv.NV.CooperativeMatrixLoad %107, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %109 = spirv.IAdd %100, %cst82_i32 : i32 | |
| %110 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %111 = spirv.NV.CooperativeMatrixLoad %110, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %112 = spirv.IMul %93, %cst288_i32 : i32 | |
| %113 = spirv.SDiv %44, %cst32_i32 : i32 | |
| %114 = spirv.ISub %cst-1_i32, %113 : i32 | |
| %115 = spirv.Select %42, %114, %113 : i1, i32 | |
| %116 = spirv.IMul %115, %cst4_i32 : i32 | |
| %117 = spirv.IAdd %112, %116 : i32 | |
| %118 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %119 = spirv.NV.CooperativeMatrixLoad %118, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %120 = spirv.IAdd %117, %cst2_i32 : i32 | |
| %121 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %122 = spirv.NV.CooperativeMatrixLoad %121, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %123 = spirv.IAdd %117, %cst144_i32 : i32 | |
| %124 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %125 = spirv.NV.CooperativeMatrixLoad %124, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %126 = spirv.IAdd %117, %cst146_i32 : i32 | |
| %127 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %128 = spirv.NV.CooperativeMatrixLoad %127, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %129 = spirv.NV.CooperativeMatrixMulAdd %102, %119, %97 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %130 = spirv.NV.CooperativeMatrixMulAdd %105, %125, %129 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.NV.CooperativeMatrixMulAdd %102, %122, %96 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %132 = spirv.NV.CooperativeMatrixMulAdd %105, %128, %131 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %133 = spirv.NV.CooperativeMatrixMulAdd %108, %119, %95 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %134 = spirv.NV.CooperativeMatrixMulAdd %111, %125, %133 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %135 = spirv.NV.CooperativeMatrixMulAdd %108, %122, %94 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %136 = spirv.NV.CooperativeMatrixMulAdd %111, %128, %135 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %137 = spirv.IMul %4, %cst1280_i32 : i32 | |
| %138 = spirv.IAdd %35, %137 : i32 | |
| %139 = spirv.IAdd %138, %67 : i32 | |
| %140 = spirv.IAdd %139, %30 : i32 | |
| %141 = spirv.IAdd %140, %116 : i32 | |
| %142 = spirv.IAdd %141, %cst642_i32 : i32 | |
| %143 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %143, %136, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %144 = spirv.IAdd %141, %cst640_i32 : i32 | |
| %145 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %144] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %145, %134, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %146 = spirv.IAdd %141, %cst2_i32 : i32 | |
| %147 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %146] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %147, %132, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %148 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %141] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %148, %130, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_40_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_40_matmul_18432x320x320 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_43 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_43_generic_2x9216x5x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 1 : index, 4 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c2 = arith.constant 2 : index | |
| %c5 = arith.constant 5 : index | |
| %c4608 = arith.constant 4608 : index | |
| hal.return %c2, %c5, %c4608 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_43_generic_2x9216x5x64() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst147456_i32 = spirv.Constant 147456 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst-4_i32 = spirv.Constant -4 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst8_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst8_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.UDiv %17, %cst2304_i32 : i32 | |
| %19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32> | |
| %21 = spirv.UMod %17, %cst2304_i32 : i32 | |
| %22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %24 = spirv.IMul %21, %cst-4_i32 : i32 | |
| %25 = spirv.IAdd %24, %cst9216_i32 : i32 | |
| %26 = spirv.SLessThan %25, %cst4_i32 : i32 | |
| %27 = spirv.Select %26, %25, %cst4_i32 : i1, i32 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32> | |
| %30 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %31 = spirv.CompositeExtract %30[0 : i32] : vector<3xi32> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%29 : i32) | |
| ^bb1(%32: i32): // 2 preds: ^bb0, ^bb2 | |
| %33 = spirv.SLessThan %32, %27 : i32 | |
| spirv.BranchConditional %33, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %34 = spirv.IMul %32, %cst80_i32 : i32 | |
| %35 = spirv.IMul %21, %cst320_i32 : i32 | |
| %36 = spirv.IAdd %34, %35 : i32 | |
| %37 = spirv.IMul %23, %cst8_i32 : i32 | |
| %38 = spirv.IAdd %36, %37 : i32 | |
| %39 = spirv.IAdd %38, %31 : i32 | |
| %40 = spirv.IMul %18, %cst737280_i32 : i32 | |
| %41 = spirv.IAdd %39, %40 : i32 | |
| %42 = spirv.IMul %20, %cst16_i32 : i32 | |
| %43 = spirv.IAdd %41, %42 : i32 | |
| %44 = spirv.IAdd %43, %9 : i32 | |
| %45 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %44] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %46 = spirv.Load "StorageBuffer" %45 : vector<4xf16> | |
| %47 = spirv.IMul %32, %cst16_i32 : i32 | |
| %48 = spirv.IMul %21, %cst64_i32 : i32 | |
| %49 = spirv.IAdd %47, %48 : i32 | |
| %50 = spirv.IAdd %49, %37 : i32 | |
| %51 = spirv.IAdd %50, %31 : i32 | |
| %52 = spirv.IAdd %51, %40 : i32 | |
| %53 = spirv.IMul %20, %cst147456_i32 : i32 | |
| %54 = spirv.IAdd %52, %53 : i32 | |
| %55 = spirv.IAdd %54, %15 : i32 | |
| %56 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %56, %46 : vector<4xf16> | |
| %57 = spirv.IAdd %32, %cst4_i32 : i32 | |
| spirv.Branch ^bb1(%57 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_43_generic_2x9216x5x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_43_generic_2x9216x5x64 "LocalSize", 8, 1, 4 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_44 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_44_generic_2x5x9216x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c2 = arith.constant 2 : index | |
| %c2304 = arith.constant 2304 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c2, %c2304, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_44_generic_2x5x9216x64() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst147456_i32 = spirv.Constant 147456 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst_f32 = spirv.Constant 0.353553385 : f32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst8_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst8_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.UDiv %17, %cst5_i32 : i32 | |
| %19 = spirv.UMod %17, %cst5_i32 : i32 | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[1 : i32] : vector<3xi32> | |
| %22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %24 = spirv.FConvert %cst_f32 : f32 to f16 | |
| %25 = spirv.CompositeConstruct %24, %24, %24, %24 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32> | |
| %28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %29 = spirv.CompositeExtract %28[0 : i32] : vector<3xi32> | |
| %30 = spirv.IMul %27, %cst80_i32 : i32 | |
| %31 = spirv.IMul %21, %cst320_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IMul %23, %cst8_i32 : i32 | |
| %34 = spirv.IAdd %32, %33 : i32 | |
| %35 = spirv.IAdd %34, %29 : i32 | |
| %36 = spirv.IMul %18, %cst737280_i32 : i32 | |
| %37 = spirv.IAdd %35, %36 : i32 | |
| %38 = spirv.IMul %19, %cst16_i32 : i32 | |
| %39 = spirv.IAdd %37, %38 : i32 | |
| %40 = spirv.IAdd %39, %9 : i32 | |
| %41 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %42 = spirv.Load "StorageBuffer" %41 : vector<4xf16> | |
| %43 = spirv.FMul %42, %25 : vector<4xf16> | |
| %44 = spirv.IMul %27, %cst16_i32 : i32 | |
| %45 = spirv.IMul %21, %cst64_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IAdd %46, %33 : i32 | |
| %48 = spirv.IAdd %47, %29 : i32 | |
| %49 = spirv.IAdd %48, %36 : i32 | |
| %50 = spirv.IMul %19, %cst147456_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IAdd %51, %15 : i32 | |
| %53 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %53, %43 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_44_generic_2x5x9216x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_44_generic_2x5x9216x64 "LocalSize", 8, 4, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_45 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_45_generic_2x320x9216 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c72 = arith.constant 72 : index | |
| %c320 = arith.constant 320 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c72, %c320, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_45_generic_2x320x9216() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2304_i32 = spirv.Constant 2304 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst960_i32 = spirv.Constant 960 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst2949120_i32 = spirv.Constant 2949120 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst40960_i32 = spirv.Constant 40960 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 0.353553385 : f32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst8_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32> | |
| %22 = spirv.FConvert %cst_f32 : f32 to f16 | |
| %23 = spirv.CompositeConstruct %22, %22, %22, %22 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32> | |
| %26 = spirv.IMul %21, %cst40960_i32 : i32 | |
| %27 = spirv.IMul %25, %cst1280_i32 : i32 | |
| %28 = spirv.IAdd %26, %27 : i32 | |
| %29 = spirv.IMul %17, %cst2949120_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IAdd %30, %19 : i32 | |
| %32 = spirv.IAdd %31, %9 : i32 | |
| %33 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %34 = spirv.Load "StorageBuffer" %33 : f16 | |
| %35 = spirv.IAdd %32, %cst320_i32 : i32 | |
| %36 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %37 = spirv.Load "StorageBuffer" %36 : f16 | |
| %38 = spirv.IAdd %32, %cst640_i32 : i32 | |
| %39 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %40 = spirv.Load "StorageBuffer" %39 : f16 | |
| %41 = spirv.IAdd %32, %cst960_i32 : i32 | |
| %42 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %43 = spirv.Load "StorageBuffer" %42 : f16 | |
| %44 = spirv.CompositeConstruct %34, %37, %40, %43 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %45 = spirv.FMul %44, %23 : vector<4xf16> | |
| %46 = spirv.IMul %21, %cst32_i32 : i32 | |
| %47 = spirv.IAdd %46, %25 : i32 | |
| %48 = spirv.IMul %17, %cst737280_i32 : i32 | |
| %49 = spirv.IAdd %47, %48 : i32 | |
| %50 = spirv.IMul %19, %cst2304_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IAdd %51, %15 : i32 | |
| %53 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %53, %45 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_45_generic_2x320x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_45_generic_2x320x9216 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_46 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_46_batch_matmul_10x9216x9216x64 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c144 = arith.constant 144 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c144, %c144, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_46_batch_matmul_10x9216x9216x64() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst1152_i32 = spirv.Constant 1152 : i32 | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst18434_i32 = spirv.Constant 18434 : i32 | |
| %cst10616832_i32 = spirv.Constant 10616832 : i32 | |
| %cst434_i32 = spirv.Constant 434 : i32 | |
| %cst290_i32 = spirv.Constant 290 : i32 | |
| %cst402_i32 = spirv.Constant 402 : i32 | |
| %cst400_i32 = spirv.Constant 400 : i32 | |
| %cst322_i32 = spirv.Constant 322 : i32 | |
| %cst432_i32 = spirv.Constant 432 : i32 | |
| %cst55296_i32 = spirv.Constant 55296 : i32 | |
| %cst36864_i32 = spirv.Constant 36864 : i32 | |
| %cst480_i32 = spirv.Constant 480 : i32 | |
| %cst260_i32 = spirv.Constant 260 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst1144_i32 = spirv.Constant 1144 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst18432_i32 = spirv.Constant 18432 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst128_i32 = spirv.Constant 128 : i32 | |
| %cst512_i32 = spirv.Constant 512 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %15 = spirv.Select %13, %14, %8 : i1, i32 | |
| %16 = spirv.SDiv %15, %cst16_i32 : i32 | |
| %17 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %18 = spirv.Select %13, %17, %16 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %19 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %20 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %21 = spirv.Select %19, %20, %10 : i1, i32 | |
| %22 = spirv.SDiv %21, %cst16_i32 : i32 | |
| %23 = spirv.ISub %cst-1_i32, %22 : i32 | |
| %24 = spirv.Select %19, %23, %22 : i1, i32 | |
| %25 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %26 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %27 = spirv.Select %25, %26, %12 : i1, i32 | |
| %28 = spirv.SDiv %27, %cst16_i32 : i32 | |
| %29 = spirv.ISub %cst-1_i32, %28 : i32 | |
| %30 = spirv.Select %25, %29, %28 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32> | |
| %33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32> | |
| %35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32> | |
| %37 = spirv.IMul %34, %cst512_i32 : i32 | |
| %38 = spirv.IAdd %37, %2 : i32 | |
| %39 = spirv.IMul %4, %cst128_i32 : i32 | |
| %40 = spirv.IAdd %38, %39 : i32 | |
| %41 = spirv.IMul %6, %cst256_i32 : i32 | |
| %42 = spirv.IAdd %40, %41 : i32 | |
| %43 = spirv.IMul %32, %cst73728_i32 : i32 | |
| %44 = spirv.IAdd %42, %43 : i32 | |
| %45 = spirv.IAdd %44, %18 : i32 | |
| %46 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %47 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %48 = spirv.Select %46, %47, %2 : i1, i32 | |
| %49 = spirv.SDiv %48, %cst4_i32 : i32 | |
| %50 = spirv.ISub %cst-1_i32, %49 : i32 | |
| %51 = spirv.Select %46, %50, %49 : i1, i32 | |
| %52 = spirv.IMul %51, %cst4_i32 : i32 | |
| %53 = spirv.IAdd %45, %52 : i32 | |
| %54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %55 = spirv.Load "StorageBuffer" %54 : vector<4xf32> | |
| %56 = spirv.IMul %4, %cst80_i32 : i32 | |
| %57 = spirv.IAdd %2, %56 : i32 | |
| %58 = spirv.IMul %6, %cst160_i32 : i32 | |
| %59 = spirv.IAdd %57, %58 : i32 | |
| %60 = spirv.IAdd %59, %51 : i32 | |
| %61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %61, %55 : vector<4xf32> | |
| %62 = spirv.IAdd %53, %cst256_i32 : i32 | |
| %63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %64 = spirv.Load "StorageBuffer" %63 : vector<4xf32> | |
| %65 = spirv.IAdd %60, %cst160_i32 : i32 | |
| %66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %66, %64 : vector<4xf32> | |
| %67 = spirv.IMul %4, %cst9216_i32 : i32 | |
| %68 = spirv.IAdd %2, %67 : i32 | |
| %69 = spirv.IMul %6, %cst18432_i32 : i32 | |
| %70 = spirv.IAdd %68, %69 : i32 | |
| %71 = spirv.IMul %36, %cst8_i32 : i32 | |
| %72 = spirv.IAdd %70, %71 : i32 | |
| %73 = spirv.IAdd %72, %43 : i32 | |
| %74 = spirv.IAdd %73, %24 : i32 | |
| %75 = spirv.SDiv %48, %cst8_i32 : i32 | |
| %76 = spirv.ISub %cst-1_i32, %75 : i32 | |
| %77 = spirv.Select %46, %76, %75 : i1, i32 | |
| %78 = spirv.IMul %77, %cst1144_i32 : i32 | |
| %79 = spirv.IAdd %74, %78 : i32 | |
| %80 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %81 = spirv.Load "StorageBuffer" %80 : vector<4xf32> | |
| %82 = spirv.IMul %4, %cst72_i32 : i32 | |
| %83 = spirv.IAdd %2, %82 : i32 | |
| %84 = spirv.IMul %6, %cst144_i32 : i32 | |
| %85 = spirv.IAdd %83, %84 : i32 | |
| %86 = spirv.IAdd %85, %77 : i32 | |
| %87 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %87, %81 : vector<4xf32> | |
| %88 = spirv.IAdd %79, %cst18432_i32 : i32 | |
| %89 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %90 = spirv.Load "StorageBuffer" %89 : vector<4xf32> | |
| %91 = spirv.IAdd %86, %cst144_i32 : i32 | |
| %92 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %92, %90 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %93 = spirv.IMul %4, %cst160_i32 : i32 | |
| %94 = spirv.IMul %6, %cst320_i32 : i32 | |
| %95 = spirv.IAdd %93, %94 : i32 | |
| %96 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %95] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %97 = spirv.NV.CooperativeMatrixLoad %96, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %98 = spirv.IAdd %95, %cst2_i32 : i32 | |
| %99 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %100 = spirv.NV.CooperativeMatrixLoad %99, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %101 = spirv.IAdd %95, %cst80_i32 : i32 | |
| %102 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %103 = spirv.NV.CooperativeMatrixLoad %102, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %104 = spirv.IAdd %95, %cst82_i32 : i32 | |
| %105 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %104] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %106 = spirv.NV.CooperativeMatrixLoad %105, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %107 = spirv.IMul %6, %cst288_i32 : i32 | |
| %108 = spirv.SDiv %48, %cst32_i32 : i32 | |
| %109 = spirv.ISub %cst-1_i32, %108 : i32 | |
| %110 = spirv.Select %46, %109, %108 : i1, i32 | |
| %111 = spirv.IMul %110, %cst4_i32 : i32 | |
| %112 = spirv.IAdd %107, %111 : i32 | |
| %113 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %114 = spirv.NV.CooperativeMatrixLoad %113, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %115 = spirv.IAdd %112, %cst2_i32 : i32 | |
| %116 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %117 = spirv.NV.CooperativeMatrixLoad %116, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %118 = spirv.IAdd %112, %cst144_i32 : i32 | |
| %119 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %118] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %120 = spirv.NV.CooperativeMatrixLoad %119, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %121 = spirv.IAdd %112, %cst146_i32 : i32 | |
| %122 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %123 = spirv.NV.CooperativeMatrixLoad %122, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %124 = spirv.NV.CooperativeMatrixMulAdd %97, %114, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %125 = spirv.NV.CooperativeMatrixMulAdd %100, %120, %124 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %126 = spirv.NV.CooperativeMatrixMulAdd %97, %117, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %127 = spirv.NV.CooperativeMatrixMulAdd %100, %123, %126 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %128 = spirv.NV.CooperativeMatrixMulAdd %103, %114, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %129 = spirv.NV.CooperativeMatrixMulAdd %106, %120, %128 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %130 = spirv.NV.CooperativeMatrixMulAdd %103, %117, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.NV.CooperativeMatrixMulAdd %106, %123, %130 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %132 = spirv.IAdd %53, %cst4_i32 : i32 | |
| %133 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %132] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %134 = spirv.Load "StorageBuffer" %133 : vector<4xf32> | |
| %135 = spirv.IAdd %60, %cst320_i32 : i32 | |
| %136 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %136, %134 : vector<4xf32> | |
| %137 = spirv.IAdd %53, %cst260_i32 : i32 | |
| %138 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %137] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %139 = spirv.Load "StorageBuffer" %138 : vector<4xf32> | |
| %140 = spirv.IAdd %60, %cst480_i32 : i32 | |
| %141 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %140] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %141, %139 : vector<4xf32> | |
| %142 = spirv.IAdd %79, %cst36864_i32 : i32 | |
| %143 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %144 = spirv.Load "StorageBuffer" %143 : vector<4xf32> | |
| %145 = spirv.IAdd %86, %cst288_i32 : i32 | |
| %146 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %146, %144 : vector<4xf32> | |
| %147 = spirv.IAdd %79, %cst55296_i32 : i32 | |
| %148 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %147] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %149 = spirv.Load "StorageBuffer" %148 : vector<4xf32> | |
| %150 = spirv.IAdd %86, %cst432_i32 : i32 | |
| %151 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %150] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %151, %149 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %152 = spirv.IAdd %95, %cst320_i32 : i32 | |
| %153 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %152] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %154 = spirv.NV.CooperativeMatrixLoad %153, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %155 = spirv.IAdd %95, %cst322_i32 : i32 | |
| %156 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %155] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %157 = spirv.NV.CooperativeMatrixLoad %156, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %158 = spirv.IAdd %95, %cst400_i32 : i32 | |
| %159 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %160 = spirv.NV.CooperativeMatrixLoad %159, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %161 = spirv.IAdd %95, %cst402_i32 : i32 | |
| %162 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %161] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %163 = spirv.NV.CooperativeMatrixLoad %162, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %164 = spirv.IAdd %112, %cst288_i32 : i32 | |
| %165 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %166 = spirv.NV.CooperativeMatrixLoad %165, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %167 = spirv.IAdd %112, %cst290_i32 : i32 | |
| %168 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %169 = spirv.NV.CooperativeMatrixLoad %168, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %170 = spirv.IAdd %112, %cst432_i32 : i32 | |
| %171 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %170] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %172 = spirv.NV.CooperativeMatrixLoad %171, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %173 = spirv.IAdd %112, %cst434_i32 : i32 | |
| %174 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %173] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %175 = spirv.NV.CooperativeMatrixLoad %174, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %176 = spirv.NV.CooperativeMatrixMulAdd %154, %166, %125 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %177 = spirv.NV.CooperativeMatrixMulAdd %157, %172, %176 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %178 = spirv.NV.CooperativeMatrixMulAdd %154, %169, %127 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %179 = spirv.NV.CooperativeMatrixMulAdd %157, %175, %178 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %180 = spirv.NV.CooperativeMatrixMulAdd %160, %166, %129 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %181 = spirv.NV.CooperativeMatrixMulAdd %163, %172, %180 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %182 = spirv.NV.CooperativeMatrixMulAdd %160, %169, %131 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %183 = spirv.NV.CooperativeMatrixMulAdd %163, %175, %182 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %184 = spirv.IMul %32, %cst10616832_i32 : i32 | |
| %185 = spirv.IMul %6, %cst10616832_i32 : i32 | |
| %186 = spirv.IAdd %184, %185 : i32 | |
| %187 = spirv.IMul %34, %cst73728_i32 : i32 | |
| %188 = spirv.IAdd %186, %187 : i32 | |
| %189 = spirv.IMul %4, %cst36864_i32 : i32 | |
| %190 = spirv.IAdd %188, %189 : i32 | |
| %191 = spirv.IAdd %190, %71 : i32 | |
| %192 = spirv.IAdd %191, %30 : i32 | |
| %193 = spirv.IAdd %192, %111 : i32 | |
| %194 = spirv.IAdd %193, %cst18434_i32 : i32 | |
| %195 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %195, %183, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %196 = spirv.IAdd %193, %cst18432_i32 : i32 | |
| %197 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %196] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %197, %181, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %198 = spirv.IAdd %193, %cst2_i32 : i32 | |
| %199 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %198] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %199, %179, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %200 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %193] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %200, %177, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_46_batch_matmul_10x9216x9216x64, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_46_batch_matmul_10x9216x9216x64 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_47 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_47 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [128 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c9216 = arith.constant 9216 : index | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c9216, %c5, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Float16, Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_47() "None" { | |
| %cst1152_i32 = spirv.Constant 1152 : i32 | |
| %cst10616832_i32 = spirv.Constant 10616832 : i32 | |
| %cst53084160_i32 = spirv.Constant 53084160 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst_vec_4xf32 = spirv.Constant dense<-2.67890066E+36> : vector<4xf32> | |
| %cst_vec_4xf32_0 = spirv.Constant dense<0.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_1 = spirv.Constant dense<0.693147182> : vector<4xf32> | |
| %cst_vec_4xf32_2 = spirv.Constant dense<1.44269502> : vector<4xf32> | |
| %cst_vec_4xf32_3 = spirv.Constant dense<1.000000e+00> : vector<4xf32> | |
| %cst_vec_4xf32_4 = spirv.Constant dense<0.499705136> : vector<4xf32> | |
| %cst_vec_4xf32_5 = spirv.Constant dense<0.168738902> : vector<4xf32> | |
| %cst_vec_4xf32_6 = spirv.Constant dense<0.0366896503> : vector<4xf32> | |
| %cst_vec_4xf32_7 = spirv.Constant dense<1.314350e-02> : vector<4xf32> | |
| %cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32> | |
| %cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32> | |
| %cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32> | |
| %cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32> | |
| %cst_vec_4xi32_11 = spirv.Constant dense<127> : vector<4xi32> | |
| %cst_vec_4xi32_12 = spirv.Constant dense<-127> : vector<4xi32> | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1024_i32 = spirv.Constant 1024 : i32 | |
| %cst9216_i32 = spirv.Constant 9216 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %5 = spirv.Load "PushConstant" %4 : i32 | |
| %6 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %7 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %8 = spirv.Select %6, %7, %3 : i1, i32 | |
| %9 = spirv.SDiv %8, %cst16_i32 : i32 | |
| %10 = spirv.ISub %cst-1_i32, %9 : i32 | |
| %11 = spirv.Select %6, %10, %9 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %12 = spirv.SLessThan %5, %cst0_i32 : i32 | |
| %13 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %14 = spirv.Select %12, %13, %5 : i1, i32 | |
| %15 = spirv.SDiv %14, %cst16_i32 : i32 | |
| %16 = spirv.ISub %cst-1_i32, %15 : i32 | |
| %17 = spirv.Select %12, %16, %15 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[1 : i32] : vector<3xi32> | |
| %22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %24 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>) | |
| ^bb1(%206: i32, %207: vector<4xf32>): // 2 preds: ^bb0, ^bb2 | |
| %208 = spirv.SLessThan %206, %cst9216_i32 : i32 | |
| spirv.BranchConditional %208, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %209 = spirv.SLessThan %206, %cst0_i32 : i32 | |
| %210 = spirv.ISub %cst-1_i32, %206 : i32 | |
| %211 = spirv.Select %209, %210, %206 : i1, i32 | |
| %212 = spirv.SDiv %211, %cst8_i32 : i32 | |
| %213 = spirv.ISub %cst-1_i32, %212 : i32 | |
| %214 = spirv.Select %209, %213, %212 : i1, i32 | |
| %215 = spirv.IMul %19, %cst53084160_i32 : i32 | |
| %216 = spirv.IAdd %1, %215 : i32 | |
| %217 = spirv.IMul %21, %cst10616832_i32 : i32 | |
| %218 = spirv.IAdd %216, %217 : i32 | |
| %219 = spirv.IMul %23, %cst1152_i32 : i32 | |
| %220 = spirv.IAdd %218, %219 : i32 | |
| %221 = spirv.IAdd %214, %220 : i32 | |
| %222 = spirv.IAdd %221, %11 : i32 | |
| %223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %224 = spirv.Load "StorageBuffer" %223 : vector<4xf32> | |
| %225 = spirv.VectorShuffle [0 : i32, 1 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32> | |
| %226 = spirv.Bitcast %225 : vector<2xf32> to vector<4xf16> | |
| %227 = spirv.CompositeExtract %207[0 : i32] : vector<4xf32> | |
| %228 = spirv.Bitcast %227 : f32 to vector<2xf16> | |
| %229 = spirv.CompositeExtract %228[0 : i32] : vector<2xf16> | |
| %230 = spirv.CompositeExtract %228[1 : i32] : vector<2xf16> | |
| %231 = spirv.CompositeExtract %207[1 : i32] : vector<4xf32> | |
| %232 = spirv.Bitcast %231 : f32 to vector<2xf16> | |
| %233 = spirv.CompositeExtract %232[0 : i32] : vector<2xf16> | |
| %234 = spirv.CompositeExtract %232[1 : i32] : vector<2xf16> | |
| %235 = spirv.CompositeConstruct %229, %230, %233, %234 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %236 = spirv.GL.FMax %226, %235 : vector<4xf16> | |
| %237 = spirv.IsNan %226 : vector<4xf16> | |
| %238 = spirv.IsNan %235 : vector<4xf16> | |
| %239 = spirv.Select %237, %226, %236 : vector<4xi1>, vector<4xf16> | |
| %240 = spirv.Select %238, %235, %239 : vector<4xi1>, vector<4xf16> | |
| %241 = spirv.VectorShuffle [2 : i32, 3 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32> | |
| %242 = spirv.Bitcast %241 : vector<2xf32> to vector<4xf16> | |
| %243 = spirv.CompositeExtract %207[2 : i32] : vector<4xf32> | |
| %244 = spirv.Bitcast %243 : f32 to vector<2xf16> | |
| %245 = spirv.CompositeExtract %244[0 : i32] : vector<2xf16> | |
| %246 = spirv.CompositeExtract %244[1 : i32] : vector<2xf16> | |
| %247 = spirv.CompositeExtract %207[3 : i32] : vector<4xf32> | |
| %248 = spirv.Bitcast %247 : f32 to vector<2xf16> | |
| %249 = spirv.CompositeExtract %248[0 : i32] : vector<2xf16> | |
| %250 = spirv.CompositeExtract %248[1 : i32] : vector<2xf16> | |
| %251 = spirv.CompositeConstruct %245, %246, %249, %250 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %252 = spirv.GL.FMax %242, %251 : vector<4xf16> | |
| %253 = spirv.IsNan %242 : vector<4xf16> | |
| %254 = spirv.IsNan %251 : vector<4xf16> | |
| %255 = spirv.Select %253, %242, %252 : vector<4xi1>, vector<4xf16> | |
| %256 = spirv.Select %254, %251, %255 : vector<4xi1>, vector<4xf16> | |
| %257 = spirv.Bitcast %256 : vector<4xf16> to vector<2xf32> | |
| %258 = spirv.Bitcast %240 : vector<4xf16> to vector<2xf32> | |
| %259 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %258 : vector<2xf32> -> vector<4xf32> | |
| %260 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %259 : vector<4xf32>, %257 : vector<2xf32> -> vector<4xf32> | |
| spirv.Store "Function" %24, %260 : vector<4xf32> | |
| %261 = spirv.IAdd %206, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%261, %260 : i32, vector<4xf32>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %25 = spirv.Load "Function" %24 : vector<4xf32> | |
| %26 = spirv.CompositeExtract %25[0 : i32] : vector<4xf32> | |
| %27 = spirv.Bitcast %26 : f32 to vector<2xf16> | |
| %28 = spirv.CompositeExtract %27[0 : i32] : vector<2xf16> | |
| %29 = spirv.CompositeExtract %27[1 : i32] : vector<2xf16> | |
| %30 = spirv.CompositeExtract %25[1 : i32] : vector<4xf32> | |
| %31 = spirv.Bitcast %30 : f32 to vector<2xf16> | |
| %32 = spirv.CompositeExtract %31[0 : i32] : vector<2xf16> | |
| %33 = spirv.CompositeExtract %31[1 : i32] : vector<2xf16> | |
| %34 = spirv.GL.FMax %28, %29 : f16 | |
| %35 = spirv.GL.FMax %34, %32 : f16 | |
| %36 = spirv.GL.FMax %35, %33 : f16 | |
| %37 = spirv.CompositeExtract %25[2 : i32] : vector<4xf32> | |
| %38 = spirv.Bitcast %37 : f32 to vector<2xf16> | |
| %39 = spirv.CompositeExtract %38[0 : i32] : vector<2xf16> | |
| %40 = spirv.CompositeExtract %38[1 : i32] : vector<2xf16> | |
| %41 = spirv.CompositeExtract %25[3 : i32] : vector<4xf32> | |
| %42 = spirv.Bitcast %41 : f32 to vector<2xf16> | |
| %43 = spirv.CompositeExtract %42[0 : i32] : vector<2xf16> | |
| %44 = spirv.CompositeExtract %42[1 : i32] : vector<2xf16> | |
| %45 = spirv.GL.FMax %39, %40 : f16 | |
| %46 = spirv.GL.FMax %45, %43 : f16 | |
| %47 = spirv.GL.FMax %46, %44 : f16 | |
| %48 = spirv.CompositeConstruct %36, %47 : (f16, f16) -> vector<2xf16> | |
| %49 = spirv.Bitcast %48 : vector<2xf16> to i32 | |
| %50 = spirv.GroupNonUniformShuffleXor <Subgroup> %49, %cst1_i32 : i32, i32 | |
| %51 = spirv.Bitcast %50 : i32 to vector<2xf16> | |
| %52 = spirv.GL.FMax %48, %51 : vector<2xf16> | |
| %53 = spirv.IsNan %48 : vector<2xf16> | |
| %54 = spirv.IsNan %51 : vector<2xf16> | |
| %55 = spirv.Select %53, %48, %52 : vector<2xi1>, vector<2xf16> | |
| %56 = spirv.Select %54, %51, %55 : vector<2xi1>, vector<2xf16> | |
| %57 = spirv.Bitcast %56 : vector<2xf16> to i32 | |
| %58 = spirv.GroupNonUniformShuffleXor <Subgroup> %57, %cst2_i32 : i32, i32 | |
| %59 = spirv.Bitcast %58 : i32 to vector<2xf16> | |
| %60 = spirv.GL.FMax %56, %59 : vector<2xf16> | |
| %61 = spirv.IsNan %56 : vector<2xf16> | |
| %62 = spirv.IsNan %59 : vector<2xf16> | |
| %63 = spirv.Select %61, %56, %60 : vector<2xi1>, vector<2xf16> | |
| %64 = spirv.Select %62, %59, %63 : vector<2xi1>, vector<2xf16> | |
| %65 = spirv.Bitcast %64 : vector<2xf16> to i32 | |
| %66 = spirv.GroupNonUniformShuffleXor <Subgroup> %65, %cst4_i32 : i32, i32 | |
| %67 = spirv.Bitcast %66 : i32 to vector<2xf16> | |
| %68 = spirv.GL.FMax %64, %67 : vector<2xf16> | |
| %69 = spirv.IsNan %64 : vector<2xf16> | |
| %70 = spirv.IsNan %67 : vector<2xf16> | |
| %71 = spirv.Select %69, %64, %68 : vector<2xi1>, vector<2xf16> | |
| %72 = spirv.Select %70, %67, %71 : vector<2xi1>, vector<2xf16> | |
| %73 = spirv.Bitcast %72 : vector<2xf16> to i32 | |
| %74 = spirv.GroupNonUniformShuffleXor <Subgroup> %73, %cst8_i32 : i32, i32 | |
| %75 = spirv.Bitcast %74 : i32 to vector<2xf16> | |
| %76 = spirv.GL.FMax %72, %75 : vector<2xf16> | |
| %77 = spirv.IsNan %72 : vector<2xf16> | |
| %78 = spirv.IsNan %75 : vector<2xf16> | |
| %79 = spirv.Select %77, %72, %76 : vector<2xi1>, vector<2xf16> | |
| %80 = spirv.Select %78, %75, %79 : vector<2xi1>, vector<2xf16> | |
| %81 = spirv.Bitcast %80 : vector<2xf16> to i32 | |
| %82 = spirv.GroupNonUniformShuffleXor <Subgroup> %81, %cst16_i32 : i32, i32 | |
| %83 = spirv.Bitcast %82 : i32 to vector<2xf16> | |
| %84 = spirv.GL.FMax %80, %83 : vector<2xf16> | |
| %85 = spirv.IsNan %80 : vector<2xf16> | |
| %86 = spirv.IsNan %83 : vector<2xf16> | |
| %87 = spirv.Select %85, %80, %84 : vector<2xi1>, vector<2xf16> | |
| %88 = spirv.Select %86, %83, %87 : vector<2xi1>, vector<2xf16> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| %89 = spirv.UDiv %1, %cst32_i32 : i32 | |
| %90 = spirv.UMod %1, %cst32_i32 : i32 | |
| %91 = spirv.IEqual %90, %cst0_i32 : i32 | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %91, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %206 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %206, %88 : vector<2xf16> | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %92 = spirv.GL.UMin %90, %cst3_i32 : i32 | |
| %93 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| %94 = spirv.Load "Workgroup" %93 : vector<2xf16> | |
| %95 = spirv.Bitcast %94 : vector<2xf16> to i32 | |
| %96 = spirv.GroupNonUniformShuffleXor <Subgroup> %95, %cst1_i32 : i32, i32 | |
| %97 = spirv.Bitcast %96 : i32 to vector<2xf16> | |
| %98 = spirv.GL.FMax %94, %97 : vector<2xf16> | |
| %99 = spirv.IsNan %94 : vector<2xf16> | |
| %100 = spirv.IsNan %97 : vector<2xf16> | |
| %101 = spirv.Select %99, %94, %98 : vector<2xi1>, vector<2xf16> | |
| %102 = spirv.Select %100, %97, %101 : vector<2xi1>, vector<2xf16> | |
| %103 = spirv.Bitcast %102 : vector<2xf16> to i32 | |
| %104 = spirv.GroupNonUniformShuffleXor <Subgroup> %103, %cst2_i32 : i32, i32 | |
| %105 = spirv.Bitcast %104 : i32 to vector<2xf16> | |
| %106 = spirv.GL.FMax %102, %105 : vector<2xf16> | |
| %107 = spirv.IsNan %102 : vector<2xf16> | |
| %108 = spirv.IsNan %105 : vector<2xf16> | |
| %109 = spirv.Select %107, %102, %106 : vector<2xi1>, vector<2xf16> | |
| %110 = spirv.Select %108, %105, %109 : vector<2xi1>, vector<2xf16> | |
| %111 = spirv.Bitcast %110 : vector<2xf16> to i32 | |
| %112 = spirv.GroupNonUniformShuffle <Subgroup> %111, %cst0_i32 : i32, i32 | |
| %113 = spirv.Bitcast %112 : i32 to vector<2xf16> | |
| %114 = spirv.CompositeExtract %113[0 : i32] : vector<2xf16> | |
| %115 = spirv.CompositeExtract %113[1 : i32] : vector<2xf16> | |
| %116 = spirv.GL.FMax %114, %115 : f16 | |
| %117 = spirv.CompositeConstruct %116, %116, %116, %116 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %118 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32_0 : i32, vector<4xf32>) | |
| ^bb1(%206: i32, %207: vector<4xf32>): // 2 preds: ^bb0, ^bb2 | |
| %208 = spirv.SLessThan %206, %cst9216_i32 : i32 | |
| spirv.BranchConditional %208, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %209 = spirv.SLessThan %206, %cst0_i32 : i32 | |
| %210 = spirv.ISub %cst-1_i32, %206 : i32 | |
| %211 = spirv.Select %209, %210, %206 : i1, i32 | |
| %212 = spirv.SDiv %211, %cst8_i32 : i32 | |
| %213 = spirv.ISub %cst-1_i32, %212 : i32 | |
| %214 = spirv.Select %209, %213, %212 : i1, i32 | |
| %215 = spirv.IMul %19, %cst53084160_i32 : i32 | |
| %216 = spirv.IAdd %1, %215 : i32 | |
| %217 = spirv.IMul %21, %cst10616832_i32 : i32 | |
| %218 = spirv.IAdd %216, %217 : i32 | |
| %219 = spirv.IMul %23, %cst1152_i32 : i32 | |
| %220 = spirv.IAdd %218, %219 : i32 | |
| %221 = spirv.IAdd %214, %220 : i32 | |
| %222 = spirv.IAdd %221, %11 : i32 | |
| %223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %224 = spirv.Load "StorageBuffer" %223 : vector<4xf32> | |
| %225 = spirv.VectorShuffle [0 : i32, 1 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32> | |
| %226 = spirv.Bitcast %225 : vector<2xf32> to vector<4xf16> | |
| %227 = spirv.FSub %226, %117 : vector<4xf16> | |
| %228 = spirv.VectorShuffle [2 : i32, 3 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32> | |
| %229 = spirv.Bitcast %228 : vector<2xf32> to vector<4xf16> | |
| %230 = spirv.FSub %229, %117 : vector<4xf16> | |
| %231 = spirv.FConvert %227 : vector<4xf16> to vector<4xf32> | |
| %232 = spirv.IsNan %231 : vector<4xf32> | |
| %233 = spirv.LogicalOr %232, %232 : vector<4xi1> | |
| %234 = spirv.FMul %231, %cst_vec_4xf32_2 : vector<4xf32> | |
| %235 = spirv.GL.Floor %234 : vector<4xf32> | |
| %236 = spirv.FMul %235, %cst_vec_4xf32_1 : vector<4xf32> | |
| %237 = spirv.FSub %231, %236 : vector<4xf32> | |
| %238 = spirv.FMul %237, %237 : vector<4xf32> | |
| %239 = spirv.FMul %238, %238 : vector<4xf32> | |
| %240 = spirv.GL.Fma %cst_vec_4xf32_3, %237, %cst_vec_4xf32_3 : vector<4xf32> | |
| %241 = spirv.GL.Fma %cst_vec_4xf32_5, %237, %cst_vec_4xf32_4 : vector<4xf32> | |
| %242 = spirv.GL.Fma %cst_vec_4xf32_7, %237, %cst_vec_4xf32_6 : vector<4xf32> | |
| %243 = spirv.GL.Fma %241, %238, %240 : vector<4xf32> | |
| %244 = spirv.GL.Fma %242, %239, %243 : vector<4xf32> | |
| %245 = spirv.ConvertFToS %235 : vector<4xf32> to vector<4xi32> | |
| %246 = spirv.IAdd %245, %cst_vec_4xi32_11 : vector<4xi32> | |
| %247 = spirv.ShiftLeftLogical %246, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %248 = spirv.Bitcast %247 : vector<4xi32> to vector<4xf32> | |
| %249 = spirv.FMul %244, %248 : vector<4xf32> | |
| %250 = spirv.SLessThanEqual %245, %cst_vec_4xi32_11 : vector<4xi32> | |
| %251 = spirv.SGreaterThanEqual %245, %cst_vec_4xi32_12 : vector<4xi32> | |
| %252 = spirv.FOrdEqual %231, %cst_vec_4xf32_9 : vector<4xf32> | |
| %253 = spirv.FOrdEqual %231, %cst_vec_4xf32_8 : vector<4xf32> | |
| %254 = spirv.FOrdGreaterThan %231, %cst_vec_4xf32_0 : vector<4xf32> | |
| %255 = spirv.LogicalAnd %250, %251 : vector<4xi1> | |
| %256 = spirv.Select %254, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %257 = spirv.Select %255, %249, %256 : vector<4xi1>, vector<4xf32> | |
| %258 = spirv.Select %253, %cst_vec_4xf32_8, %257 : vector<4xi1>, vector<4xf32> | |
| %259 = spirv.Select %252, %cst_vec_4xf32_0, %258 : vector<4xi1>, vector<4xf32> | |
| %260 = spirv.Select %233, %231, %259 : vector<4xi1>, vector<4xf32> | |
| %261 = spirv.FConvert %260 : vector<4xf32> to vector<4xf16> | |
| %262 = spirv.FConvert %230 : vector<4xf16> to vector<4xf32> | |
| %263 = spirv.IsNan %262 : vector<4xf32> | |
| %264 = spirv.LogicalOr %263, %263 : vector<4xi1> | |
| %265 = spirv.FMul %262, %cst_vec_4xf32_2 : vector<4xf32> | |
| %266 = spirv.GL.Floor %265 : vector<4xf32> | |
| %267 = spirv.FMul %266, %cst_vec_4xf32_1 : vector<4xf32> | |
| %268 = spirv.FSub %262, %267 : vector<4xf32> | |
| %269 = spirv.FMul %268, %268 : vector<4xf32> | |
| %270 = spirv.FMul %269, %269 : vector<4xf32> | |
| %271 = spirv.GL.Fma %cst_vec_4xf32_3, %268, %cst_vec_4xf32_3 : vector<4xf32> | |
| %272 = spirv.GL.Fma %cst_vec_4xf32_5, %268, %cst_vec_4xf32_4 : vector<4xf32> | |
| %273 = spirv.GL.Fma %cst_vec_4xf32_7, %268, %cst_vec_4xf32_6 : vector<4xf32> | |
| %274 = spirv.GL.Fma %272, %269, %271 : vector<4xf32> | |
| %275 = spirv.GL.Fma %273, %270, %274 : vector<4xf32> | |
| %276 = spirv.ConvertFToS %266 : vector<4xf32> to vector<4xi32> | |
| %277 = spirv.IAdd %276, %cst_vec_4xi32_11 : vector<4xi32> | |
| %278 = spirv.ShiftLeftLogical %277, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %279 = spirv.Bitcast %278 : vector<4xi32> to vector<4xf32> | |
| %280 = spirv.FMul %275, %279 : vector<4xf32> | |
| %281 = spirv.SLessThanEqual %276, %cst_vec_4xi32_11 : vector<4xi32> | |
| %282 = spirv.SGreaterThanEqual %276, %cst_vec_4xi32_12 : vector<4xi32> | |
| %283 = spirv.FOrdEqual %262, %cst_vec_4xf32_9 : vector<4xf32> | |
| %284 = spirv.FOrdEqual %262, %cst_vec_4xf32_8 : vector<4xf32> | |
| %285 = spirv.FOrdGreaterThan %262, %cst_vec_4xf32_0 : vector<4xf32> | |
| %286 = spirv.LogicalAnd %281, %282 : vector<4xi1> | |
| %287 = spirv.Select %285, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %288 = spirv.Select %286, %280, %287 : vector<4xi1>, vector<4xf32> | |
| %289 = spirv.Select %284, %cst_vec_4xf32_8, %288 : vector<4xi1>, vector<4xf32> | |
| %290 = spirv.Select %283, %cst_vec_4xf32_0, %289 : vector<4xi1>, vector<4xf32> | |
| %291 = spirv.Select %264, %262, %290 : vector<4xi1>, vector<4xf32> | |
| %292 = spirv.FConvert %291 : vector<4xf32> to vector<4xf16> | |
| %293 = spirv.CompositeExtract %207[0 : i32] : vector<4xf32> | |
| %294 = spirv.Bitcast %293 : f32 to vector<2xf16> | |
| %295 = spirv.CompositeExtract %294[0 : i32] : vector<2xf16> | |
| %296 = spirv.CompositeExtract %294[1 : i32] : vector<2xf16> | |
| %297 = spirv.CompositeExtract %207[1 : i32] : vector<4xf32> | |
| %298 = spirv.Bitcast %297 : f32 to vector<2xf16> | |
| %299 = spirv.CompositeExtract %298[0 : i32] : vector<2xf16> | |
| %300 = spirv.CompositeExtract %298[1 : i32] : vector<2xf16> | |
| %301 = spirv.CompositeConstruct %295, %296, %299, %300 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %302 = spirv.FAdd %261, %301 : vector<4xf16> | |
| %303 = spirv.CompositeExtract %207[2 : i32] : vector<4xf32> | |
| %304 = spirv.Bitcast %303 : f32 to vector<2xf16> | |
| %305 = spirv.CompositeExtract %304[0 : i32] : vector<2xf16> | |
| %306 = spirv.CompositeExtract %304[1 : i32] : vector<2xf16> | |
| %307 = spirv.CompositeExtract %207[3 : i32] : vector<4xf32> | |
| %308 = spirv.Bitcast %307 : f32 to vector<2xf16> | |
| %309 = spirv.CompositeExtract %308[0 : i32] : vector<2xf16> | |
| %310 = spirv.CompositeExtract %308[1 : i32] : vector<2xf16> | |
| %311 = spirv.CompositeConstruct %305, %306, %309, %310 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %312 = spirv.FAdd %292, %311 : vector<4xf16> | |
| %313 = spirv.Bitcast %312 : vector<4xf16> to vector<2xf32> | |
| %314 = spirv.Bitcast %302 : vector<4xf16> to vector<2xf32> | |
| %315 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %314 : vector<2xf32> -> vector<4xf32> | |
| %316 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %315 : vector<4xf32>, %313 : vector<2xf32> -> vector<4xf32> | |
| spirv.Store "Function" %118, %316 : vector<4xf32> | |
| %317 = spirv.IAdd %206, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%317, %316 : i32, vector<4xf32>) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %119 = spirv.Load "Function" %118 : vector<4xf32> | |
| %120 = spirv.CompositeExtract %119[0 : i32] : vector<4xf32> | |
| %121 = spirv.Bitcast %120 : f32 to vector<2xf16> | |
| %122 = spirv.CompositeExtract %121[0 : i32] : vector<2xf16> | |
| %123 = spirv.CompositeExtract %121[1 : i32] : vector<2xf16> | |
| %124 = spirv.CompositeExtract %119[1 : i32] : vector<4xf32> | |
| %125 = spirv.Bitcast %124 : f32 to vector<2xf16> | |
| %126 = spirv.CompositeExtract %125[0 : i32] : vector<2xf16> | |
| %127 = spirv.CompositeExtract %125[1 : i32] : vector<2xf16> | |
| %128 = spirv.FAdd %122, %123 : f16 | |
| %129 = spirv.FAdd %128, %126 : f16 | |
| %130 = spirv.FAdd %129, %127 : f16 | |
| %131 = spirv.CompositeExtract %119[2 : i32] : vector<4xf32> | |
| %132 = spirv.Bitcast %131 : f32 to vector<2xf16> | |
| %133 = spirv.CompositeExtract %132[0 : i32] : vector<2xf16> | |
| %134 = spirv.CompositeExtract %132[1 : i32] : vector<2xf16> | |
| %135 = spirv.CompositeExtract %119[3 : i32] : vector<4xf32> | |
| %136 = spirv.Bitcast %135 : f32 to vector<2xf16> | |
| %137 = spirv.CompositeExtract %136[0 : i32] : vector<2xf16> | |
| %138 = spirv.CompositeExtract %136[1 : i32] : vector<2xf16> | |
| %139 = spirv.FAdd %133, %134 : f16 | |
| %140 = spirv.FAdd %139, %137 : f16 | |
| %141 = spirv.FAdd %140, %138 : f16 | |
| %142 = spirv.CompositeConstruct %130, %141 : (f16, f16) -> vector<2xf16> | |
| %143 = spirv.Bitcast %142 : vector<2xf16> to i32 | |
| %144 = spirv.GroupNonUniformShuffleXor <Subgroup> %143, %cst1_i32 : i32, i32 | |
| %145 = spirv.Bitcast %144 : i32 to vector<2xf16> | |
| %146 = spirv.FAdd %142, %145 : vector<2xf16> | |
| %147 = spirv.Bitcast %146 : vector<2xf16> to i32 | |
| %148 = spirv.GroupNonUniformShuffleXor <Subgroup> %147, %cst2_i32 : i32, i32 | |
| %149 = spirv.Bitcast %148 : i32 to vector<2xf16> | |
| %150 = spirv.FAdd %146, %149 : vector<2xf16> | |
| %151 = spirv.Bitcast %150 : vector<2xf16> to i32 | |
| %152 = spirv.GroupNonUniformShuffleXor <Subgroup> %151, %cst4_i32 : i32, i32 | |
| %153 = spirv.Bitcast %152 : i32 to vector<2xf16> | |
| %154 = spirv.FAdd %150, %153 : vector<2xf16> | |
| %155 = spirv.Bitcast %154 : vector<2xf16> to i32 | |
| %156 = spirv.GroupNonUniformShuffleXor <Subgroup> %155, %cst8_i32 : i32, i32 | |
| %157 = spirv.Bitcast %156 : i32 to vector<2xf16> | |
| %158 = spirv.FAdd %154, %157 : vector<2xf16> | |
| %159 = spirv.Bitcast %158 : vector<2xf16> to i32 | |
| %160 = spirv.GroupNonUniformShuffleXor <Subgroup> %159, %cst16_i32 : i32, i32 | |
| %161 = spirv.Bitcast %160 : i32 to vector<2xf16> | |
| %162 = spirv.FAdd %158, %161 : vector<2xf16> | |
| %__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %91, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %206 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %206, %162 : vector<2xf16> | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %163 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| %164 = spirv.Load "Workgroup" %163 : vector<2xf16> | |
| %165 = spirv.Bitcast %164 : vector<2xf16> to i32 | |
| %166 = spirv.GroupNonUniformShuffleXor <Subgroup> %165, %cst1_i32 : i32, i32 | |
| %167 = spirv.Bitcast %166 : i32 to vector<2xf16> | |
| %168 = spirv.FAdd %164, %167 : vector<2xf16> | |
| %169 = spirv.Bitcast %168 : vector<2xf16> to i32 | |
| %170 = spirv.GroupNonUniformShuffleXor <Subgroup> %169, %cst2_i32 : i32, i32 | |
| %171 = spirv.Bitcast %170 : i32 to vector<2xf16> | |
| %172 = spirv.FAdd %168, %171 : vector<2xf16> | |
| %173 = spirv.Bitcast %172 : vector<2xf16> to i32 | |
| %174 = spirv.GroupNonUniformShuffle <Subgroup> %173, %cst0_i32 : i32, i32 | |
| %175 = spirv.Bitcast %174 : i32 to vector<2xf16> | |
| %176 = spirv.CompositeExtract %175[0 : i32] : vector<2xf16> | |
| %177 = spirv.CompositeExtract %175[1 : i32] : vector<2xf16> | |
| %178 = spirv.FAdd %176, %177 : f16 | |
| %179 = spirv.FAdd %178, %cst_f16 : f16 | |
| %__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup> | |
| spirv.mlir.selection { | |
| spirv.BranchConditional %91, ^bb1, ^bb2 | |
| ^bb1: // pred: ^bb0 | |
| %206 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %206, %88 : vector<2xf16> | |
| spirv.Branch ^bb2 | |
| ^bb2: // 2 preds: ^bb0, ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %180 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32 | |
| %181 = spirv.Load "Workgroup" %180 : vector<2xf16> | |
| %182 = spirv.Bitcast %181 : vector<2xf16> to i32 | |
| %183 = spirv.GroupNonUniformShuffleXor <Subgroup> %182, %cst1_i32 : i32, i32 | |
| %184 = spirv.Bitcast %183 : i32 to vector<2xf16> | |
| %185 = spirv.GL.FMax %181, %184 : vector<2xf16> | |
| %186 = spirv.IsNan %181 : vector<2xf16> | |
| %187 = spirv.IsNan %184 : vector<2xf16> | |
| %188 = spirv.Select %186, %181, %185 : vector<2xi1>, vector<2xf16> | |
| %189 = spirv.Select %187, %184, %188 : vector<2xi1>, vector<2xf16> | |
| %190 = spirv.Bitcast %189 : vector<2xf16> to i32 | |
| %191 = spirv.GroupNonUniformShuffleXor <Subgroup> %190, %cst2_i32 : i32, i32 | |
| %192 = spirv.Bitcast %191 : i32 to vector<2xf16> | |
| %193 = spirv.GL.FMax %189, %192 : vector<2xf16> | |
| %194 = spirv.IsNan %189 : vector<2xf16> | |
| %195 = spirv.IsNan %192 : vector<2xf16> | |
| %196 = spirv.Select %194, %189, %193 : vector<2xi1>, vector<2xf16> | |
| %197 = spirv.Select %195, %192, %196 : vector<2xi1>, vector<2xf16> | |
| %198 = spirv.Bitcast %197 : vector<2xf16> to i32 | |
| %199 = spirv.GroupNonUniformShuffle <Subgroup> %198, %cst0_i32 : i32, i32 | |
| %200 = spirv.Bitcast %199 : i32 to vector<2xf16> | |
| %201 = spirv.CompositeExtract %200[0 : i32] : vector<2xf16> | |
| %202 = spirv.CompositeExtract %200[1 : i32] : vector<2xf16> | |
| %203 = spirv.GL.FMax %201, %202 : f16 | |
| %204 = spirv.CompositeConstruct %203, %203, %203, %203 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %205 = spirv.CompositeConstruct %179, %179, %179, %179 : (f16, f16, f16, f16) -> vector<4xf16> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32 : i32) | |
| ^bb1(%206: i32): // 2 preds: ^bb0, ^bb2 | |
| %207 = spirv.SLessThan %206, %cst9216_i32 : i32 | |
| spirv.BranchConditional %207, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %208 = spirv.SLessThan %206, %cst0_i32 : i32 | |
| %209 = spirv.ISub %cst-1_i32, %206 : i32 | |
| %210 = spirv.Select %208, %209, %206 : i1, i32 | |
| %211 = spirv.SDiv %210, %cst8_i32 : i32 | |
| %212 = spirv.ISub %cst-1_i32, %211 : i32 | |
| %213 = spirv.Select %208, %212, %211 : i1, i32 | |
| %214 = spirv.IMul %19, %cst53084160_i32 : i32 | |
| %215 = spirv.IAdd %1, %214 : i32 | |
| %216 = spirv.IMul %21, %cst10616832_i32 : i32 | |
| %217 = spirv.IAdd %215, %216 : i32 | |
| %218 = spirv.IMul %23, %cst1152_i32 : i32 | |
| %219 = spirv.IAdd %217, %218 : i32 | |
| %220 = spirv.IAdd %213, %219 : i32 | |
| %221 = spirv.IAdd %220, %11 : i32 | |
| %222 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %221] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %223 = spirv.Load "StorageBuffer" %222 : vector<4xf32> | |
| %224 = spirv.VectorShuffle [0 : i32, 1 : i32] %223 : vector<4xf32>, %223 : vector<4xf32> -> vector<2xf32> | |
| %225 = spirv.Bitcast %224 : vector<2xf32> to vector<4xf16> | |
| %226 = spirv.FSub %225, %204 : vector<4xf16> | |
| %227 = spirv.VectorShuffle [2 : i32, 3 : i32] %223 : vector<4xf32>, %223 : vector<4xf32> -> vector<2xf32> | |
| %228 = spirv.Bitcast %227 : vector<2xf32> to vector<4xf16> | |
| %229 = spirv.FSub %228, %204 : vector<4xf16> | |
| %230 = spirv.FConvert %226 : vector<4xf16> to vector<4xf32> | |
| %231 = spirv.IsNan %230 : vector<4xf32> | |
| %232 = spirv.LogicalOr %231, %231 : vector<4xi1> | |
| %233 = spirv.FMul %230, %cst_vec_4xf32_2 : vector<4xf32> | |
| %234 = spirv.GL.Floor %233 : vector<4xf32> | |
| %235 = spirv.FMul %234, %cst_vec_4xf32_1 : vector<4xf32> | |
| %236 = spirv.FSub %230, %235 : vector<4xf32> | |
| %237 = spirv.FMul %236, %236 : vector<4xf32> | |
| %238 = spirv.FMul %237, %237 : vector<4xf32> | |
| %239 = spirv.GL.Fma %cst_vec_4xf32_3, %236, %cst_vec_4xf32_3 : vector<4xf32> | |
| %240 = spirv.GL.Fma %cst_vec_4xf32_5, %236, %cst_vec_4xf32_4 : vector<4xf32> | |
| %241 = spirv.GL.Fma %cst_vec_4xf32_7, %236, %cst_vec_4xf32_6 : vector<4xf32> | |
| %242 = spirv.GL.Fma %240, %237, %239 : vector<4xf32> | |
| %243 = spirv.GL.Fma %241, %238, %242 : vector<4xf32> | |
| %244 = spirv.ConvertFToS %234 : vector<4xf32> to vector<4xi32> | |
| %245 = spirv.IAdd %244, %cst_vec_4xi32_11 : vector<4xi32> | |
| %246 = spirv.ShiftLeftLogical %245, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %247 = spirv.Bitcast %246 : vector<4xi32> to vector<4xf32> | |
| %248 = spirv.FMul %243, %247 : vector<4xf32> | |
| %249 = spirv.SLessThanEqual %244, %cst_vec_4xi32_11 : vector<4xi32> | |
| %250 = spirv.SGreaterThanEqual %244, %cst_vec_4xi32_12 : vector<4xi32> | |
| %251 = spirv.FOrdEqual %230, %cst_vec_4xf32_9 : vector<4xf32> | |
| %252 = spirv.FOrdEqual %230, %cst_vec_4xf32_8 : vector<4xf32> | |
| %253 = spirv.FOrdGreaterThan %230, %cst_vec_4xf32_0 : vector<4xf32> | |
| %254 = spirv.LogicalAnd %249, %250 : vector<4xi1> | |
| %255 = spirv.Select %253, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %256 = spirv.Select %254, %248, %255 : vector<4xi1>, vector<4xf32> | |
| %257 = spirv.Select %252, %cst_vec_4xf32_8, %256 : vector<4xi1>, vector<4xf32> | |
| %258 = spirv.Select %251, %cst_vec_4xf32_0, %257 : vector<4xi1>, vector<4xf32> | |
| %259 = spirv.Select %232, %230, %258 : vector<4xi1>, vector<4xf32> | |
| %260 = spirv.FConvert %259 : vector<4xf32> to vector<4xf16> | |
| %261 = spirv.FConvert %229 : vector<4xf16> to vector<4xf32> | |
| %262 = spirv.IsNan %261 : vector<4xf32> | |
| %263 = spirv.LogicalOr %262, %262 : vector<4xi1> | |
| %264 = spirv.FMul %261, %cst_vec_4xf32_2 : vector<4xf32> | |
| %265 = spirv.GL.Floor %264 : vector<4xf32> | |
| %266 = spirv.FMul %265, %cst_vec_4xf32_1 : vector<4xf32> | |
| %267 = spirv.FSub %261, %266 : vector<4xf32> | |
| %268 = spirv.FMul %267, %267 : vector<4xf32> | |
| %269 = spirv.FMul %268, %268 : vector<4xf32> | |
| %270 = spirv.GL.Fma %cst_vec_4xf32_3, %267, %cst_vec_4xf32_3 : vector<4xf32> | |
| %271 = spirv.GL.Fma %cst_vec_4xf32_5, %267, %cst_vec_4xf32_4 : vector<4xf32> | |
| %272 = spirv.GL.Fma %cst_vec_4xf32_7, %267, %cst_vec_4xf32_6 : vector<4xf32> | |
| %273 = spirv.GL.Fma %271, %268, %270 : vector<4xf32> | |
| %274 = spirv.GL.Fma %272, %269, %273 : vector<4xf32> | |
| %275 = spirv.ConvertFToS %265 : vector<4xf32> to vector<4xi32> | |
| %276 = spirv.IAdd %275, %cst_vec_4xi32_11 : vector<4xi32> | |
| %277 = spirv.ShiftLeftLogical %276, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32> | |
| %278 = spirv.Bitcast %277 : vector<4xi32> to vector<4xf32> | |
| %279 = spirv.FMul %274, %278 : vector<4xf32> | |
| %280 = spirv.SLessThanEqual %275, %cst_vec_4xi32_11 : vector<4xi32> | |
| %281 = spirv.SGreaterThanEqual %275, %cst_vec_4xi32_12 : vector<4xi32> | |
| %282 = spirv.FOrdEqual %261, %cst_vec_4xf32_9 : vector<4xf32> | |
| %283 = spirv.FOrdEqual %261, %cst_vec_4xf32_8 : vector<4xf32> | |
| %284 = spirv.FOrdGreaterThan %261, %cst_vec_4xf32_0 : vector<4xf32> | |
| %285 = spirv.LogicalAnd %280, %281 : vector<4xi1> | |
| %286 = spirv.Select %284, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32> | |
| %287 = spirv.Select %285, %279, %286 : vector<4xi1>, vector<4xf32> | |
| %288 = spirv.Select %283, %cst_vec_4xf32_8, %287 : vector<4xi1>, vector<4xf32> | |
| %289 = spirv.Select %282, %cst_vec_4xf32_0, %288 : vector<4xi1>, vector<4xf32> | |
| %290 = spirv.Select %263, %261, %289 : vector<4xi1>, vector<4xf32> | |
| %291 = spirv.FConvert %290 : vector<4xf32> to vector<4xf16> | |
| %292 = spirv.FDiv %260, %205 : vector<4xf16> | |
| %293 = spirv.FDiv %291, %205 : vector<4xf16> | |
| %294 = spirv.Bitcast %293 : vector<4xf16> to vector<2xf32> | |
| %295 = spirv.Bitcast %292 : vector<4xf16> to vector<2xf32> | |
| %296 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %295 : vector<2xf32> -> vector<4xf32> | |
| %297 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %296 : vector<4xf32>, %294 : vector<2xf32> -> vector<4xf32> | |
| %298 = spirv.IAdd %220, %17 : i32 | |
| %299 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %298] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %299, %297 : vector<4xf32> | |
| %300 = spirv.IAdd %206, %cst1024_i32 : i32 | |
| spirv.Branch ^bb1(%300 : i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_47, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_47 "LocalSize", 128, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_48 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_48_batch_matmul_10x9216x64x9216 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c1 = arith.constant 1 : index | |
| %c144 = arith.constant 144 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c1, %c144, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_48_batch_matmul_10x9216x64x9216() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst130_i32 = spirv.Constant 130 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst512_i32 = spirv.Constant 512 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst128_i32 = spirv.Constant 128 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst1148_i32 = spirv.Constant 1148 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst10616832_i32 = spirv.Constant 10616832 : i32 | |
| %cst36864_i32 = spirv.Constant 36864 : i32 | |
| %cst18432_i32 = spirv.Constant 18432 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst9184_i32 = spirv.Constant 9184 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %15 = spirv.Select %13, %14, %8 : i1, i32 | |
| %16 = spirv.SDiv %15, %cst16_i32 : i32 | |
| %17 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %18 = spirv.Select %13, %17, %16 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %19 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %20 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %21 = spirv.Select %19, %20, %10 : i1, i32 | |
| %22 = spirv.SDiv %21, %cst16_i32 : i32 | |
| %23 = spirv.ISub %cst-1_i32, %22 : i32 | |
| %24 = spirv.Select %19, %23, %22 : i1, i32 | |
| %25 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %26 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %27 = spirv.Select %25, %26, %12 : i1, i32 | |
| %28 = spirv.SDiv %27, %cst16_i32 : i32 | |
| %29 = spirv.ISub %cst-1_i32, %28 : i32 | |
| %30 = spirv.Select %25, %29, %28 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32> | |
| %33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32> | |
| %35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32> | |
| %37 = spirv.IMul %34, %cst73728_i32 : i32 | |
| %38 = spirv.IAdd %37, %2 : i32 | |
| %39 = spirv.IMul %4, %cst18432_i32 : i32 | |
| %40 = spirv.IAdd %38, %39 : i32 | |
| %41 = spirv.IMul %6, %cst36864_i32 : i32 | |
| %42 = spirv.IAdd %40, %41 : i32 | |
| %43 = spirv.IMul %32, %cst10616832_i32 : i32 | |
| %44 = spirv.IAdd %42, %43 : i32 | |
| %45 = spirv.IAdd %44, %18 : i32 | |
| %46 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %47 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %48 = spirv.Select %46, %47, %2 : i1, i32 | |
| %49 = spirv.SDiv %48, %cst4_i32 : i32 | |
| %50 = spirv.ISub %cst-1_i32, %49 : i32 | |
| %51 = spirv.Select %46, %50, %49 : i1, i32 | |
| %52 = spirv.IMul %51, %cst1148_i32 : i32 | |
| %53 = spirv.IAdd %45, %52 : i32 | |
| %54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %55 = spirv.Load "StorageBuffer" %54 : vector<4xf32> | |
| %56 = spirv.IMul %4, %cst80_i32 : i32 | |
| %57 = spirv.IAdd %2, %56 : i32 | |
| %58 = spirv.IMul %6, %cst160_i32 : i32 | |
| %59 = spirv.IAdd %57, %58 : i32 | |
| %60 = spirv.IAdd %59, %51 : i32 | |
| %61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %61, %55 : vector<4xf32> | |
| %62 = spirv.IAdd %53, %cst36864_i32 : i32 | |
| %63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %64 = spirv.Load "StorageBuffer" %63 : vector<4xf32> | |
| %65 = spirv.IAdd %60, %cst160_i32 : i32 | |
| %66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %66, %64 : vector<4xf32> | |
| %67 = spirv.IMul %4, %cst64_i32 : i32 | |
| %68 = spirv.IAdd %2, %67 : i32 | |
| %69 = spirv.IMul %6, %cst128_i32 : i32 | |
| %70 = spirv.IAdd %68, %69 : i32 | |
| %71 = spirv.IMul %36, %cst8_i32 : i32 | |
| %72 = spirv.IAdd %70, %71 : i32 | |
| %73 = spirv.IMul %32, %cst73728_i32 : i32 | |
| %74 = spirv.IAdd %72, %73 : i32 | |
| %75 = spirv.IAdd %74, %24 : i32 | |
| %76 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %75] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %77 = spirv.Load "StorageBuffer" %76 : vector<4xf32> | |
| %78 = spirv.IMul %4, %cst72_i32 : i32 | |
| %79 = spirv.IAdd %2, %78 : i32 | |
| %80 = spirv.IMul %6, %cst144_i32 : i32 | |
| %81 = spirv.IAdd %79, %80 : i32 | |
| %82 = spirv.SDiv %48, %cst8_i32 : i32 | |
| %83 = spirv.ISub %cst-1_i32, %82 : i32 | |
| %84 = spirv.Select %46, %83, %82 : i1, i32 | |
| %85 = spirv.IAdd %81, %84 : i32 | |
| %86 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %86, %77 : vector<4xf32> | |
| %87 = spirv.IAdd %75, %cst128_i32 : i32 | |
| %88 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %89 = spirv.Load "StorageBuffer" %88 : vector<4xf32> | |
| %90 = spirv.IAdd %85, %cst144_i32 : i32 | |
| %91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %91, %89 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %92 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %93 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %94 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %95 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %96 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%161: i32, %162: !spirv.coopmatrix<16x16xf16, Subgroup>, %163: !spirv.coopmatrix<16x16xf16, Subgroup>, %164: !spirv.coopmatrix<16x16xf16, Subgroup>, %165: !spirv.coopmatrix<16x16xf16, Subgroup>, %166: i32): // 2 preds: ^bb0, ^bb2 | |
| %167 = spirv.SLessThan %161, %cst9184_i32 : i32 | |
| spirv.BranchConditional %167, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %168 = spirv.IMul %166, %cst320_i32 : i32 | |
| %169 = spirv.IMul %4, %cst160_i32 : i32 | |
| %170 = spirv.IAdd %168, %169 : i32 | |
| %171 = spirv.IMul %6, %cst320_i32 : i32 | |
| %172 = spirv.IAdd %170, %171 : i32 | |
| %173 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %174 = spirv.NV.CooperativeMatrixLoad %173, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %175 = spirv.IAdd %172, %cst2_i32 : i32 | |
| %176 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %177 = spirv.NV.CooperativeMatrixLoad %176, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %178 = spirv.IAdd %172, %cst80_i32 : i32 | |
| %179 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %180 = spirv.NV.CooperativeMatrixLoad %179, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %181 = spirv.IAdd %172, %cst82_i32 : i32 | |
| %182 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %183 = spirv.NV.CooperativeMatrixLoad %182, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %184 = spirv.IMul %166, %cst288_i32 : i32 | |
| %185 = spirv.IMul %6, %cst288_i32 : i32 | |
| %186 = spirv.IAdd %184, %185 : i32 | |
| %187 = spirv.SDiv %48, %cst32_i32 : i32 | |
| %188 = spirv.ISub %cst-1_i32, %187 : i32 | |
| %189 = spirv.Select %46, %188, %187 : i1, i32 | |
| %190 = spirv.IMul %189, %cst4_i32 : i32 | |
| %191 = spirv.IAdd %186, %190 : i32 | |
| %192 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %193 = spirv.NV.CooperativeMatrixLoad %192, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %194 = spirv.IAdd %191, %cst2_i32 : i32 | |
| %195 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %196 = spirv.NV.CooperativeMatrixLoad %195, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %197 = spirv.IAdd %191, %cst144_i32 : i32 | |
| %198 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %199 = spirv.NV.CooperativeMatrixLoad %198, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %200 = spirv.IAdd %191, %cst146_i32 : i32 | |
| %201 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %202 = spirv.NV.CooperativeMatrixLoad %201, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %203 = spirv.NV.CooperativeMatrixMulAdd %174, %193, %162 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %204 = spirv.NV.CooperativeMatrixMulAdd %177, %199, %203 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %205 = spirv.NV.CooperativeMatrixMulAdd %174, %196, %163 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %206 = spirv.NV.CooperativeMatrixMulAdd %177, %202, %205 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %207 = spirv.NV.CooperativeMatrixMulAdd %180, %193, %164 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %208 = spirv.NV.CooperativeMatrixMulAdd %183, %199, %207 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %209 = spirv.NV.CooperativeMatrixMulAdd %180, %196, %165 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %210 = spirv.NV.CooperativeMatrixMulAdd %183, %202, %209 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %211 = spirv.IAdd %161, %cst32_i32 : i32 | |
| %212 = spirv.SLessThan %211, %cst0_i32 : i32 | |
| %213 = spirv.ISub %cst-33_i32, %161 : i32 | |
| %214 = spirv.Select %212, %213, %211 : i1, i32 | |
| %215 = spirv.SDiv %214, %cst8_i32 : i32 | |
| %216 = spirv.ISub %cst-1_i32, %215 : i32 | |
| %217 = spirv.Select %212, %216, %215 : i1, i32 | |
| %218 = spirv.IAdd %53, %217 : i32 | |
| %219 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %218] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %220 = spirv.Load "StorageBuffer" %219 : vector<4xf32> | |
| %221 = spirv.SDiv %214, %cst32_i32 : i32 | |
| %222 = spirv.ISub %cst-1_i32, %221 : i32 | |
| %223 = spirv.Select %212, %222, %221 : i1, i32 | |
| %224 = spirv.GL.SAbs %223 : i32 | |
| %225 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %226 = spirv.UMod %224, %225 : i32 | |
| %227 = spirv.IEqual %223, %224 : i32 | |
| %228 = spirv.SNegate %226 : i32 | |
| %229 = spirv.Select %227, %226, %228 : i1, i32 | |
| %230 = spirv.SLessThan %229, %cst0_i32 : i32 | |
| %231 = spirv.IAdd %229, %cst2_i32 : i32 | |
| %232 = spirv.Select %230, %231, %229 : i1, i32 | |
| %233 = spirv.IMul %223, %cst320_i32 : i32 | |
| %234 = spirv.IAdd %233, %59 : i32 | |
| %235 = spirv.SLessThan %223, %cst0_i32 : i32 | |
| %236 = spirv.ISub %cst-1_i32, %223 : i32 | |
| %237 = spirv.Select %235, %236, %223 : i1, i32 | |
| %238 = spirv.SDiv %237, %cst2_i32 : i32 | |
| %239 = spirv.ISub %cst-1_i32, %238 : i32 | |
| %240 = spirv.Select %235, %239, %238 : i1, i32 | |
| %241 = spirv.IMul %240, %cst-640_i32 : i32 | |
| %242 = spirv.IAdd %234, %241 : i32 | |
| %243 = spirv.IAdd %242, %51 : i32 | |
| %244 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %243] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %244, %220 : vector<4xf32> | |
| %245 = spirv.IAdd %218, %cst36864_i32 : i32 | |
| %246 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %247 = spirv.Load "StorageBuffer" %246 : vector<4xf32> | |
| %248 = spirv.IAdd %243, %cst160_i32 : i32 | |
| %249 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %249, %247 : vector<4xf32> | |
| %250 = spirv.IMul %211, %cst8_i32 : i32 | |
| %251 = spirv.IAdd %250, %2 : i32 | |
| %252 = spirv.IAdd %251, %67 : i32 | |
| %253 = spirv.IAdd %252, %69 : i32 | |
| %254 = spirv.IAdd %253, %71 : i32 | |
| %255 = spirv.IAdd %254, %73 : i32 | |
| %256 = spirv.IAdd %255, %24 : i32 | |
| %257 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %256] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %258 = spirv.Load "StorageBuffer" %257 : vector<4xf32> | |
| %259 = spirv.IMul %223, %cst288_i32 : i32 | |
| %260 = spirv.IAdd %259, %81 : i32 | |
| %261 = spirv.IMul %240, %cst-576_i32 : i32 | |
| %262 = spirv.IAdd %260, %261 : i32 | |
| %263 = spirv.IAdd %262, %84 : i32 | |
| %264 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %263] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %264, %258 : vector<4xf32> | |
| %265 = spirv.IAdd %256, %cst128_i32 : i32 | |
| %266 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %267 = spirv.Load "StorageBuffer" %266 : vector<4xf32> | |
| %268 = spirv.IAdd %263, %cst144_i32 : i32 | |
| %269 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %268] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %269, %267 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %92, %204 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %93, %206 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %94, %208 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %95, %210 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %96, %232 : i32 | |
| spirv.Branch ^bb1(%211, %204, %206, %208, %210, %232 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %97 = spirv.Load "Function" %96 : i32 | |
| %98 = spirv.Load "Function" %95 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %99 = spirv.Load "Function" %94 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %100 = spirv.Load "Function" %93 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %101 = spirv.Load "Function" %92 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %102 = spirv.IMul %4, %cst160_i32 : i32 | |
| %103 = spirv.IMul %97, %cst320_i32 : i32 | |
| %104 = spirv.IAdd %102, %103 : i32 | |
| %105 = spirv.IMul %6, %cst320_i32 : i32 | |
| %106 = spirv.IAdd %104, %105 : i32 | |
| %107 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %108 = spirv.NV.CooperativeMatrixLoad %107, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %109 = spirv.IAdd %106, %cst2_i32 : i32 | |
| %110 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %111 = spirv.NV.CooperativeMatrixLoad %110, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %112 = spirv.IAdd %106, %cst80_i32 : i32 | |
| %113 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %114 = spirv.NV.CooperativeMatrixLoad %113, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %115 = spirv.IAdd %106, %cst82_i32 : i32 | |
| %116 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %117 = spirv.NV.CooperativeMatrixLoad %116, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %118 = spirv.IMul %97, %cst288_i32 : i32 | |
| %119 = spirv.IMul %6, %cst288_i32 : i32 | |
| %120 = spirv.IAdd %118, %119 : i32 | |
| %121 = spirv.SDiv %48, %cst32_i32 : i32 | |
| %122 = spirv.ISub %cst-1_i32, %121 : i32 | |
| %123 = spirv.Select %46, %122, %121 : i1, i32 | |
| %124 = spirv.IMul %123, %cst4_i32 : i32 | |
| %125 = spirv.IAdd %120, %124 : i32 | |
| %126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %127 = spirv.NV.CooperativeMatrixLoad %126, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %128 = spirv.IAdd %125, %cst2_i32 : i32 | |
| %129 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.IAdd %125, %cst144_i32 : i32 | |
| %132 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %134 = spirv.IAdd %125, %cst146_i32 : i32 | |
| %135 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %134] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %136 = spirv.NV.CooperativeMatrixLoad %135, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %137 = spirv.NV.CooperativeMatrixMulAdd %108, %127, %101 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %138 = spirv.NV.CooperativeMatrixMulAdd %111, %133, %137 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %139 = spirv.NV.CooperativeMatrixMulAdd %108, %130, %100 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %140 = spirv.NV.CooperativeMatrixMulAdd %111, %136, %139 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %141 = spirv.NV.CooperativeMatrixMulAdd %114, %127, %99 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.NV.CooperativeMatrixMulAdd %117, %133, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %143 = spirv.NV.CooperativeMatrixMulAdd %114, %130, %98 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %144 = spirv.NV.CooperativeMatrixMulAdd %117, %136, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.IMul %6, %cst73728_i32 : i32 | |
| %146 = spirv.IAdd %73, %145 : i32 | |
| %147 = spirv.IMul %34, %cst512_i32 : i32 | |
| %148 = spirv.IAdd %146, %147 : i32 | |
| %149 = spirv.IMul %4, %cst256_i32 : i32 | |
| %150 = spirv.IAdd %148, %149 : i32 | |
| %151 = spirv.IAdd %150, %71 : i32 | |
| %152 = spirv.IAdd %151, %30 : i32 | |
| %153 = spirv.IAdd %152, %124 : i32 | |
| %154 = spirv.IAdd %153, %cst130_i32 : i32 | |
| %155 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %155, %144, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %156 = spirv.IAdd %153, %cst128_i32 : i32 | |
| %157 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %156] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %157, %142, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %158 = spirv.IAdd %153, %cst2_i32 : i32 | |
| %159 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %159, %140, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %160 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %153] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %160, %138, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_48_batch_matmul_10x9216x64x9216, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_48_batch_matmul_10x9216x64x9216 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_49 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_49_generic_2x5x9216x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c2 = arith.constant 2 : index | |
| %c2304 = arith.constant 2304 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c2, %c2304, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_49_generic_2x5x9216x64() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst147456_i32 = spirv.Constant 147456 : i32 | |
| %cst737280_i32 = spirv.Constant 737280 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst8_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst8_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.UDiv %17, %cst5_i32 : i32 | |
| %19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32> | |
| %21 = spirv.UMod %17, %cst5_i32 : i32 | |
| %22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32> | |
| %26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32> | |
| %28 = spirv.IMul %25, %cst16_i32 : i32 | |
| %29 = spirv.IMul %20, %cst64_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IMul %23, %cst8_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IAdd %32, %27 : i32 | |
| %34 = spirv.IMul %18, %cst737280_i32 : i32 | |
| %35 = spirv.IAdd %33, %34 : i32 | |
| %36 = spirv.IMul %21, %cst147456_i32 : i32 | |
| %37 = spirv.IAdd %35, %36 : i32 | |
| %38 = spirv.IAdd %37, %9 : i32 | |
| %39 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| %40 = spirv.Load "StorageBuffer" %39 : vector<4xf16> | |
| %41 = spirv.IMul %25, %cst80_i32 : i32 | |
| %42 = spirv.IMul %20, %cst320_i32 : i32 | |
| %43 = spirv.IAdd %41, %42 : i32 | |
| %44 = spirv.IAdd %43, %31 : i32 | |
| %45 = spirv.IAdd %44, %27 : i32 | |
| %46 = spirv.IAdd %45, %34 : i32 | |
| %47 = spirv.IMul %21, %cst16_i32 : i32 | |
| %48 = spirv.IAdd %46, %47 : i32 | |
| %49 = spirv.IAdd %48, %15 : i32 | |
| %50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %50, %40 : vector<4xf16> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_49_generic_2x5x9216x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_49_generic_2x5x9216x64 "LocalSize", 8, 4, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_50 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_50_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
| %c5 = arith.constant 5 : index | |
| %c288 = arith.constant 288 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c288, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_50_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst642_i32 = spirv.Constant 642 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst-640_i32 = spirv.Constant -640 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst36_i32 = spirv.Constant 36 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst2560_i32 = spirv.Constant 2560 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %14 = spirv.Load "PushConstant" %13 : i32 | |
| %15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %16 = spirv.Load "PushConstant" %15 : i32 | |
| %17 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %19 = spirv.Select %17, %18, %8 : i1, i32 | |
| %20 = spirv.SDiv %19, %cst16_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %20 : i32 | |
| %22 = spirv.Select %17, %21, %20 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %23 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %24 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %25 = spirv.Select %23, %24, %12 : i1, i32 | |
| %26 = spirv.SDiv %25, %cst16_i32 : i32 | |
| %27 = spirv.ISub %cst-1_i32, %26 : i32 | |
| %28 = spirv.Select %23, %27, %26 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %29 = spirv.SLessThan %14, %cst0_i32 : i32 | |
| %30 = spirv.ISub %cst-1_i32, %14 : i32 | |
| %31 = spirv.Select %29, %30, %14 : i1, i32 | |
| %32 = spirv.SDiv %31, %cst16_i32 : i32 | |
| %33 = spirv.ISub %cst-1_i32, %32 : i32 | |
| %34 = spirv.Select %29, %33, %32 : i1, i32 | |
| %35 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %36 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %37 = spirv.Select %35, %36, %10 : i1, i32 | |
| %38 = spirv.SDiv %37, %cst16_i32 : i32 | |
| %39 = spirv.ISub %cst-1_i32, %38 : i32 | |
| %40 = spirv.Select %35, %39, %38 : i1, i32 | |
| %41 = spirv.SLessThan %16, %cst0_i32 : i32 | |
| %42 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %43 = spirv.Select %41, %42, %16 : i1, i32 | |
| %44 = spirv.SDiv %43, %cst16_i32 : i32 | |
| %45 = spirv.ISub %cst-1_i32, %44 : i32 | |
| %46 = spirv.Select %41, %45, %44 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %48 = spirv.CompositeExtract %47[1 : i32] : vector<3xi32> | |
| %49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %50 = spirv.CompositeExtract %49[0 : i32] : vector<3xi32> | |
| %51 = spirv.IMul %48, %cst2560_i32 : i32 | |
| %52 = spirv.IAdd %51, %2 : i32 | |
| %53 = spirv.IMul %4, %cst640_i32 : i32 | |
| %54 = spirv.IAdd %52, %53 : i32 | |
| %55 = spirv.IMul %6, %cst1280_i32 : i32 | |
| %56 = spirv.IAdd %54, %55 : i32 | |
| %57 = spirv.IAdd %56, %22 : i32 | |
| %58 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %59 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %60 = spirv.Select %58, %59, %2 : i1, i32 | |
| %61 = spirv.SDiv %60, %cst4_i32 : i32 | |
| %62 = spirv.ISub %cst-1_i32, %61 : i32 | |
| %63 = spirv.Select %58, %62, %61 : i1, i32 | |
| %64 = spirv.IMul %63, %cst36_i32 : i32 | |
| %65 = spirv.IAdd %57, %64 : i32 | |
| %66 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %67 = spirv.Load "StorageBuffer" %66 : vector<4xf32> | |
| %68 = spirv.IMul %4, %cst80_i32 : i32 | |
| %69 = spirv.IAdd %2, %68 : i32 | |
| %70 = spirv.IMul %6, %cst160_i32 : i32 | |
| %71 = spirv.IAdd %69, %70 : i32 | |
| %72 = spirv.IAdd %71, %63 : i32 | |
| %73 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %73, %67 : vector<4xf32> | |
| %74 = spirv.IAdd %65, %cst1280_i32 : i32 | |
| %75 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %76 = spirv.Load "StorageBuffer" %75 : vector<4xf32> | |
| %77 = spirv.IAdd %72, %cst160_i32 : i32 | |
| %78 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %78, %76 : vector<4xf32> | |
| %79 = spirv.IMul %4, %cst320_i32 : i32 | |
| %80 = spirv.IAdd %2, %79 : i32 | |
| %81 = spirv.IMul %6, %cst640_i32 : i32 | |
| %82 = spirv.IAdd %80, %81 : i32 | |
| %83 = spirv.IMul %50, %cst8_i32 : i32 | |
| %84 = spirv.IAdd %82, %83 : i32 | |
| %85 = spirv.IAdd %84, %28 : i32 | |
| %86 = spirv.SDiv %60, %cst8_i32 : i32 | |
| %87 = spirv.ISub %cst-1_i32, %86 : i32 | |
| %88 = spirv.Select %58, %87, %86 : i1, i32 | |
| %89 = spirv.IMul %88, %cst32_i32 : i32 | |
| %90 = spirv.IAdd %85, %89 : i32 | |
| %91 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %92 = spirv.Load "StorageBuffer" %91 : vector<4xf32> | |
| %93 = spirv.IMul %4, %cst72_i32 : i32 | |
| %94 = spirv.IAdd %2, %93 : i32 | |
| %95 = spirv.IMul %6, %cst144_i32 : i32 | |
| %96 = spirv.IAdd %94, %95 : i32 | |
| %97 = spirv.IAdd %96, %88 : i32 | |
| %98 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %97] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %98, %92 : vector<4xf32> | |
| %99 = spirv.IAdd %90, %cst640_i32 : i32 | |
| %100 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %101 = spirv.Load "StorageBuffer" %100 : vector<4xf32> | |
| %102 = spirv.IAdd %97, %cst144_i32 : i32 | |
| %103 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %103, %101 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %104 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %105 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %106 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %107 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %108 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%193: i32, %194: !spirv.coopmatrix<16x16xf16, Subgroup>, %195: !spirv.coopmatrix<16x16xf16, Subgroup>, %196: !spirv.coopmatrix<16x16xf16, Subgroup>, %197: !spirv.coopmatrix<16x16xf16, Subgroup>, %198: i32): // 2 preds: ^bb0, ^bb2 | |
| %199 = spirv.SLessThan %193, %cst288_i32 : i32 | |
| spirv.BranchConditional %199, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %200 = spirv.IMul %198, %cst320_i32 : i32 | |
| %201 = spirv.IMul %4, %cst160_i32 : i32 | |
| %202 = spirv.IAdd %200, %201 : i32 | |
| %203 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %202] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %204 = spirv.NV.CooperativeMatrixLoad %203, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %205 = spirv.IAdd %202, %cst2_i32 : i32 | |
| %206 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %205] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %207 = spirv.NV.CooperativeMatrixLoad %206, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %208 = spirv.IAdd %202, %cst80_i32 : i32 | |
| %209 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %208] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %210 = spirv.NV.CooperativeMatrixLoad %209, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %211 = spirv.IAdd %202, %cst82_i32 : i32 | |
| %212 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %211] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %213 = spirv.NV.CooperativeMatrixLoad %212, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %214 = spirv.IMul %198, %cst288_i32 : i32 | |
| %215 = spirv.SDiv %60, %cst32_i32 : i32 | |
| %216 = spirv.ISub %cst-1_i32, %215 : i32 | |
| %217 = spirv.Select %58, %216, %215 : i1, i32 | |
| %218 = spirv.IMul %217, %cst4_i32 : i32 | |
| %219 = spirv.IAdd %214, %218 : i32 | |
| %220 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %219] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %221 = spirv.NV.CooperativeMatrixLoad %220, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %222 = spirv.IAdd %219, %cst2_i32 : i32 | |
| %223 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %224 = spirv.NV.CooperativeMatrixLoad %223, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %225 = spirv.IAdd %219, %cst144_i32 : i32 | |
| %226 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %225] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %227 = spirv.NV.CooperativeMatrixLoad %226, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %228 = spirv.IAdd %219, %cst146_i32 : i32 | |
| %229 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %228] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %230 = spirv.NV.CooperativeMatrixLoad %229, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %231 = spirv.NV.CooperativeMatrixMulAdd %204, %221, %194 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %232 = spirv.NV.CooperativeMatrixMulAdd %207, %227, %231 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %233 = spirv.NV.CooperativeMatrixMulAdd %204, %224, %195 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %234 = spirv.NV.CooperativeMatrixMulAdd %207, %230, %233 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %235 = spirv.NV.CooperativeMatrixMulAdd %210, %221, %196 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %236 = spirv.NV.CooperativeMatrixMulAdd %213, %227, %235 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %237 = spirv.NV.CooperativeMatrixMulAdd %210, %224, %197 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %238 = spirv.NV.CooperativeMatrixMulAdd %213, %230, %237 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %239 = spirv.IAdd %193, %cst32_i32 : i32 | |
| %240 = spirv.SLessThan %239, %cst0_i32 : i32 | |
| %241 = spirv.ISub %cst-33_i32, %193 : i32 | |
| %242 = spirv.Select %240, %241, %239 : i1, i32 | |
| %243 = spirv.SDiv %242, %cst8_i32 : i32 | |
| %244 = spirv.ISub %cst-1_i32, %243 : i32 | |
| %245 = spirv.Select %240, %244, %243 : i1, i32 | |
| %246 = spirv.IAdd %57, %245 : i32 | |
| %247 = spirv.IAdd %246, %64 : i32 | |
| %248 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %249 = spirv.Load "StorageBuffer" %248 : vector<4xf32> | |
| %250 = spirv.SDiv %242, %cst32_i32 : i32 | |
| %251 = spirv.ISub %cst-1_i32, %250 : i32 | |
| %252 = spirv.Select %240, %251, %250 : i1, i32 | |
| %253 = spirv.GL.SAbs %252 : i32 | |
| %254 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %255 = spirv.UMod %253, %254 : i32 | |
| %256 = spirv.IEqual %252, %253 : i32 | |
| %257 = spirv.SNegate %255 : i32 | |
| %258 = spirv.Select %256, %255, %257 : i1, i32 | |
| %259 = spirv.SLessThan %258, %cst0_i32 : i32 | |
| %260 = spirv.IAdd %258, %cst2_i32 : i32 | |
| %261 = spirv.Select %259, %260, %258 : i1, i32 | |
| %262 = spirv.IMul %252, %cst320_i32 : i32 | |
| %263 = spirv.IAdd %262, %71 : i32 | |
| %264 = spirv.SLessThan %252, %cst0_i32 : i32 | |
| %265 = spirv.ISub %cst-1_i32, %252 : i32 | |
| %266 = spirv.Select %264, %265, %252 : i1, i32 | |
| %267 = spirv.SDiv %266, %cst2_i32 : i32 | |
| %268 = spirv.ISub %cst-1_i32, %267 : i32 | |
| %269 = spirv.Select %264, %268, %267 : i1, i32 | |
| %270 = spirv.IMul %269, %cst-640_i32 : i32 | |
| %271 = spirv.IAdd %263, %270 : i32 | |
| %272 = spirv.IAdd %271, %63 : i32 | |
| %273 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %272] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %273, %249 : vector<4xf32> | |
| %274 = spirv.IAdd %247, %cst1280_i32 : i32 | |
| %275 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %274] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %276 = spirv.Load "StorageBuffer" %275 : vector<4xf32> | |
| %277 = spirv.IAdd %272, %cst160_i32 : i32 | |
| %278 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %277] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %278, %276 : vector<4xf32> | |
| %279 = spirv.IMul %239, %cst40_i32 : i32 | |
| %280 = spirv.IAdd %279, %2 : i32 | |
| %281 = spirv.IAdd %280, %79 : i32 | |
| %282 = spirv.IAdd %281, %81 : i32 | |
| %283 = spirv.IAdd %282, %83 : i32 | |
| %284 = spirv.IAdd %283, %28 : i32 | |
| %285 = spirv.IAdd %284, %89 : i32 | |
| %286 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %285] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %287 = spirv.Load "StorageBuffer" %286 : vector<4xf32> | |
| %288 = spirv.IMul %252, %cst288_i32 : i32 | |
| %289 = spirv.IAdd %288, %96 : i32 | |
| %290 = spirv.IMul %269, %cst-576_i32 : i32 | |
| %291 = spirv.IAdd %289, %290 : i32 | |
| %292 = spirv.IAdd %291, %88 : i32 | |
| %293 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %292] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %293, %287 : vector<4xf32> | |
| %294 = spirv.IAdd %285, %cst640_i32 : i32 | |
| %295 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %294] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %296 = spirv.Load "StorageBuffer" %295 : vector<4xf32> | |
| %297 = spirv.IAdd %292, %cst144_i32 : i32 | |
| %298 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %297] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %298, %296 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %104, %232 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %105, %234 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %106, %236 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %107, %238 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %108, %261 : i32 | |
| spirv.Branch ^bb1(%239, %232, %234, %236, %238, %261 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %109 = spirv.Load "Function" %108 : i32 | |
| %110 = spirv.Load "Function" %107 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %111 = spirv.Load "Function" %106 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %112 = spirv.Load "Function" %105 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %113 = spirv.Load "Function" %104 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %114 = spirv.IMul %4, %cst160_i32 : i32 | |
| %115 = spirv.IMul %109, %cst320_i32 : i32 | |
| %116 = spirv.IAdd %114, %115 : i32 | |
| %117 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %116] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %118 = spirv.NV.CooperativeMatrixLoad %117, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %119 = spirv.IAdd %116, %cst2_i32 : i32 | |
| %120 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %119] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %121 = spirv.NV.CooperativeMatrixLoad %120, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %122 = spirv.IAdd %116, %cst80_i32 : i32 | |
| %123 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %122] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %124 = spirv.NV.CooperativeMatrixLoad %123, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %125 = spirv.IAdd %116, %cst82_i32 : i32 | |
| %126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %127 = spirv.NV.CooperativeMatrixLoad %126, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %128 = spirv.IMul %109, %cst288_i32 : i32 | |
| %129 = spirv.SDiv %60, %cst32_i32 : i32 | |
| %130 = spirv.ISub %cst-1_i32, %129 : i32 | |
| %131 = spirv.Select %58, %130, %129 : i1, i32 | |
| %132 = spirv.IMul %131, %cst4_i32 : i32 | |
| %133 = spirv.IAdd %128, %132 : i32 | |
| %134 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %135 = spirv.NV.CooperativeMatrixLoad %134, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %136 = spirv.IAdd %133, %cst2_i32 : i32 | |
| %137 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %136] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %138 = spirv.NV.CooperativeMatrixLoad %137, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %139 = spirv.IAdd %133, %cst144_i32 : i32 | |
| %140 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.IAdd %133, %cst146_i32 : i32 | |
| %143 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.NV.CooperativeMatrixMulAdd %118, %135, %113 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %146 = spirv.NV.CooperativeMatrixMulAdd %121, %141, %145 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %147 = spirv.NV.CooperativeMatrixMulAdd %118, %138, %112 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %148 = spirv.NV.CooperativeMatrixMulAdd %121, %144, %147 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %149 = spirv.NV.CooperativeMatrixMulAdd %124, %135, %111 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %150 = spirv.NV.CooperativeMatrixMulAdd %127, %141, %149 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %151 = spirv.NV.CooperativeMatrixMulAdd %124, %138, %110 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %152 = spirv.NV.CooperativeMatrixMulAdd %127, %144, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %153 = spirv.IAdd %83, %34 : i32 | |
| %154 = spirv.IAdd %153, %132 : i32 | |
| %155 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %156 = spirv.NV.CooperativeMatrixLoad %155, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %157 = spirv.IAdd %154, %cst2_i32 : i32 | |
| %158 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %159 = spirv.NV.CooperativeMatrixLoad %158, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %160 = spirv.IMul %4, %cst1280_i32 : i32 | |
| %161 = spirv.IAdd %51, %160 : i32 | |
| %162 = spirv.IAdd %161, %83 : i32 | |
| %163 = spirv.IAdd %162, %40 : i32 | |
| %164 = spirv.IAdd %163, %132 : i32 | |
| %165 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %166 = spirv.NV.CooperativeMatrixLoad %165, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %167 = spirv.IAdd %164, %cst2_i32 : i32 | |
| %168 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %169 = spirv.NV.CooperativeMatrixLoad %168, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %170 = spirv.IAdd %164, %cst640_i32 : i32 | |
| %171 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %170] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %172 = spirv.NV.CooperativeMatrixLoad %171, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %173 = spirv.IAdd %164, %cst642_i32 : i32 | |
| %174 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %173] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %175 = spirv.NV.CooperativeMatrixLoad %174, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %176 = spirv.FAdd %156, %146 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %177 = spirv.FAdd %159, %148 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %178 = spirv.FAdd %156, %150 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %179 = spirv.FAdd %159, %152 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %180 = spirv.FAdd %176, %166 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %181 = spirv.FAdd %177, %169 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %182 = spirv.FAdd %178, %172 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %183 = spirv.FAdd %179, %175 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %184 = spirv.IAdd %162, %46 : i32 | |
| %185 = spirv.IAdd %184, %132 : i32 | |
| %186 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %186, %180, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %187 = spirv.IAdd %185, %cst2_i32 : i32 | |
| %188 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %188, %181, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %189 = spirv.IAdd %185, %cst640_i32 : i32 | |
| %190 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %189] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %190, %182, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %191 = spirv.IAdd %185, %cst642_i32 : i32 | |
| %192 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %192, %183, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_50_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_50_matmul_18432x320x320 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_55 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_55_matmul_160x320x1024 ordinal(0) layout(#pipeline_layout10) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [128 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c5 = arith.constant 5 : index | |
| %c1 = arith.constant 1 : index | |
| hal.return %c5, %c5, %c1 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_55_matmul_160x320x1024() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst-33_i32 = spirv.Constant -33 : i32 | |
| %cst1280_i32 = spirv.Constant 1280 : i32 | |
| %cst-576_i32 = spirv.Constant -576 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst-320_i32 = spirv.Constant -320 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst640_i32 = spirv.Constant 640 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst16752_i32 = spirv.Constant 16752 : i32 | |
| %cst124_i32 = spirv.Constant 124 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst4096_i32 = spirv.Constant 4096 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst992_i32 = spirv.Constant 992 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %11 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %12 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %13 = spirv.Select %11, %12, %8 : i1, i32 | |
| %14 = spirv.SDiv %13, %cst16_i32 : i32 | |
| %15 = spirv.ISub %cst-1_i32, %14 : i32 | |
| %16 = spirv.Select %11, %15, %14 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %17 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %18 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %19 = spirv.Select %17, %18, %10 : i1, i32 | |
| %20 = spirv.SDiv %19, %cst16_i32 : i32 | |
| %21 = spirv.ISub %cst-1_i32, %20 : i32 | |
| %22 = spirv.Select %17, %21, %20 : i1, i32 | |
| %__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %23 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %24 = spirv.CompositeExtract %23[1 : i32] : vector<3xi32> | |
| %25 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %26 = spirv.CompositeExtract %25[0 : i32] : vector<3xi32> | |
| %27 = spirv.IMul %24, %cst4096_i32 : i32 | |
| %28 = spirv.IAdd %27, %2 : i32 | |
| %29 = spirv.IMul %4, %cst4096_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IMul %6, %cst4096_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %34 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %35 = spirv.Select %33, %34, %2 : i1, i32 | |
| %36 = spirv.SDiv %35, %cst4_i32 : i32 | |
| %37 = spirv.ISub %cst-1_i32, %36 : i32 | |
| %38 = spirv.Select %33, %37, %36 : i1, i32 | |
| %39 = spirv.IMul %38, %cst124_i32 : i32 | |
| %40 = spirv.IAdd %32, %39 : i32 | |
| %41 = spirv.IAdd %40, %cst16752_i32 : i32 | |
| %42 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %43 = spirv.Load "StorageBuffer" %42 : vector<4xf32> | |
| %44 = spirv.IMul %4, %cst160_i32 : i32 | |
| %45 = spirv.IAdd %2, %44 : i32 | |
| %46 = spirv.IMul %6, %cst160_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IAdd %47, %38 : i32 | |
| %49 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %49, %43 : vector<4xf32> | |
| %50 = spirv.IMul %4, %cst640_i32 : i32 | |
| %51 = spirv.IAdd %2, %50 : i32 | |
| %52 = spirv.IMul %6, %cst640_i32 : i32 | |
| %53 = spirv.IAdd %51, %52 : i32 | |
| %54 = spirv.IMul %26, %cst8_i32 : i32 | |
| %55 = spirv.IAdd %53, %54 : i32 | |
| %56 = spirv.IAdd %55, %16 : i32 | |
| %57 = spirv.SDiv %35, %cst8_i32 : i32 | |
| %58 = spirv.ISub %cst-1_i32, %57 : i32 | |
| %59 = spirv.Select %33, %58, %57 : i1, i32 | |
| %60 = spirv.IMul %59, %cst32_i32 : i32 | |
| %61 = spirv.IAdd %56, %60 : i32 | |
| %62 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %63 = spirv.Load "StorageBuffer" %62 : vector<4xf32> | |
| %64 = spirv.IMul %4, %cst144_i32 : i32 | |
| %65 = spirv.IAdd %2, %64 : i32 | |
| %66 = spirv.IMul %6, %cst144_i32 : i32 | |
| %67 = spirv.IAdd %65, %66 : i32 | |
| %68 = spirv.IAdd %67, %59 : i32 | |
| %69 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %68] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %69, %63 : vector<4xf32> | |
| %70 = spirv.IAdd %61, %cst640_i32 : i32 | |
| %71 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %72 = spirv.Load "StorageBuffer" %71 : vector<4xf32> | |
| %73 = spirv.IAdd %68, %cst144_i32 : i32 | |
| %74 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %74, %72 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %75 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %76 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function> | |
| %77 = spirv.Variable : !spirv.ptr<i32, Function> | |
| spirv.mlir.loop { | |
| spirv.Branch ^bb1(%cst0_i32, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb1(%117: i32, %118: !spirv.coopmatrix<16x16xf16, Subgroup>, %119: !spirv.coopmatrix<16x16xf16, Subgroup>, %120: i32): // 2 preds: ^bb0, ^bb2 | |
| %121 = spirv.SLessThan %117, %cst992_i32 : i32 | |
| spirv.BranchConditional %121, ^bb2, ^bb3 | |
| ^bb2: // pred: ^bb1 | |
| %122 = spirv.IMul %120, %cst160_i32 : i32 | |
| %123 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %122] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %124 = spirv.NV.CooperativeMatrixLoad %123, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %125 = spirv.IAdd %122, %cst2_i32 : i32 | |
| %126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %127 = spirv.NV.CooperativeMatrixLoad %126, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %128 = spirv.IAdd %122, %cst80_i32 : i32 | |
| %129 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %130 = spirv.NV.CooperativeMatrixLoad %129, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %131 = spirv.IAdd %122, %cst82_i32 : i32 | |
| %132 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %133 = spirv.NV.CooperativeMatrixLoad %132, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %134 = spirv.IMul %120, %cst288_i32 : i32 | |
| %135 = spirv.SDiv %35, %cst32_i32 : i32 | |
| %136 = spirv.ISub %cst-1_i32, %135 : i32 | |
| %137 = spirv.Select %33, %136, %135 : i1, i32 | |
| %138 = spirv.IMul %137, %cst2_i32 : i32 | |
| %139 = spirv.IAdd %134, %138 : i32 | |
| %140 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.IAdd %139, %cst144_i32 : i32 | |
| %143 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.NV.CooperativeMatrixMulAdd %124, %141, %118 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %146 = spirv.NV.CooperativeMatrixMulAdd %127, %144, %145 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %147 = spirv.NV.CooperativeMatrixMulAdd %130, %141, %119 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %148 = spirv.NV.CooperativeMatrixMulAdd %133, %144, %147 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %149 = spirv.IAdd %117, %cst32_i32 : i32 | |
| %150 = spirv.SLessThan %149, %cst0_i32 : i32 | |
| %151 = spirv.ISub %cst-33_i32, %117 : i32 | |
| %152 = spirv.Select %150, %151, %149 : i1, i32 | |
| %153 = spirv.SDiv %152, %cst8_i32 : i32 | |
| %154 = spirv.ISub %cst-1_i32, %153 : i32 | |
| %155 = spirv.Select %150, %154, %153 : i1, i32 | |
| %156 = spirv.IAdd %32, %155 : i32 | |
| %157 = spirv.IAdd %156, %39 : i32 | |
| %158 = spirv.IAdd %157, %cst16752_i32 : i32 | |
| %159 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %160 = spirv.Load "StorageBuffer" %159 : vector<4xf32> | |
| %161 = spirv.SDiv %152, %cst32_i32 : i32 | |
| %162 = spirv.ISub %cst-1_i32, %161 : i32 | |
| %163 = spirv.Select %150, %162, %161 : i1, i32 | |
| %164 = spirv.GL.SAbs %163 : i32 | |
| %165 = spirv.GL.SAbs %cst2_i32 : i32 | |
| %166 = spirv.UMod %164, %165 : i32 | |
| %167 = spirv.IEqual %163, %164 : i32 | |
| %168 = spirv.SNegate %166 : i32 | |
| %169 = spirv.Select %167, %166, %168 : i1, i32 | |
| %170 = spirv.SLessThan %169, %cst0_i32 : i32 | |
| %171 = spirv.IAdd %169, %cst2_i32 : i32 | |
| %172 = spirv.Select %170, %171, %169 : i1, i32 | |
| %173 = spirv.IMul %163, %cst160_i32 : i32 | |
| %174 = spirv.IAdd %173, %47 : i32 | |
| %175 = spirv.SLessThan %163, %cst0_i32 : i32 | |
| %176 = spirv.ISub %cst-1_i32, %163 : i32 | |
| %177 = spirv.Select %175, %176, %163 : i1, i32 | |
| %178 = spirv.SDiv %177, %cst2_i32 : i32 | |
| %179 = spirv.ISub %cst-1_i32, %178 : i32 | |
| %180 = spirv.Select %175, %179, %178 : i1, i32 | |
| %181 = spirv.IMul %180, %cst-320_i32 : i32 | |
| %182 = spirv.IAdd %174, %181 : i32 | |
| %183 = spirv.IAdd %182, %38 : i32 | |
| %184 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %183] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %184, %160 : vector<4xf32> | |
| %185 = spirv.IMul %149, %cst40_i32 : i32 | |
| %186 = spirv.IAdd %185, %2 : i32 | |
| %187 = spirv.IAdd %186, %50 : i32 | |
| %188 = spirv.IAdd %187, %52 : i32 | |
| %189 = spirv.IAdd %188, %54 : i32 | |
| %190 = spirv.IAdd %189, %16 : i32 | |
| %191 = spirv.IAdd %190, %60 : i32 | |
| %192 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %193 = spirv.Load "StorageBuffer" %192 : vector<4xf32> | |
| %194 = spirv.IMul %163, %cst288_i32 : i32 | |
| %195 = spirv.IAdd %194, %67 : i32 | |
| %196 = spirv.IMul %180, %cst-576_i32 : i32 | |
| %197 = spirv.IAdd %195, %196 : i32 | |
| %198 = spirv.IAdd %197, %59 : i32 | |
| %199 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %198] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %199, %193 : vector<4xf32> | |
| %200 = spirv.IAdd %191, %cst640_i32 : i32 | |
| %201 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %202 = spirv.Load "StorageBuffer" %201 : vector<4xf32> | |
| %203 = spirv.IAdd %198, %cst144_i32 : i32 | |
| %204 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %204, %202 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| spirv.Store "Function" %75, %146 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %76, %148 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Store "Function" %77, %172 : i32 | |
| spirv.Branch ^bb1(%149, %146, %148, %172 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32) | |
| ^bb3: // pred: ^bb1 | |
| spirv.mlir.merge | |
| } | |
| %78 = spirv.Load "Function" %77 : i32 | |
| %79 = spirv.Load "Function" %76 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %80 = spirv.Load "Function" %75 : !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %81 = spirv.IMul %78, %cst160_i32 : i32 | |
| %82 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %83 = spirv.NV.CooperativeMatrixLoad %82, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %84 = spirv.IAdd %81, %cst2_i32 : i32 | |
| %85 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %84] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %86 = spirv.NV.CooperativeMatrixLoad %85, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %87 = spirv.IAdd %81, %cst80_i32 : i32 | |
| %88 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %89 = spirv.NV.CooperativeMatrixLoad %88, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %90 = spirv.IAdd %81, %cst82_i32 : i32 | |
| %91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %92 = spirv.NV.CooperativeMatrixLoad %91, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %93 = spirv.IMul %78, %cst288_i32 : i32 | |
| %94 = spirv.SDiv %35, %cst32_i32 : i32 | |
| %95 = spirv.ISub %cst-1_i32, %94 : i32 | |
| %96 = spirv.Select %33, %95, %94 : i1, i32 | |
| %97 = spirv.IMul %96, %cst2_i32 : i32 | |
| %98 = spirv.IAdd %93, %97 : i32 | |
| %99 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %100 = spirv.NV.CooperativeMatrixLoad %99, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %101 = spirv.IAdd %98, %cst144_i32 : i32 | |
| %102 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %103 = spirv.NV.CooperativeMatrixLoad %102, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %104 = spirv.NV.CooperativeMatrixMulAdd %83, %100, %80 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %105 = spirv.NV.CooperativeMatrixMulAdd %86, %103, %104 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %106 = spirv.NV.CooperativeMatrixMulAdd %89, %100, %79 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %107 = spirv.NV.CooperativeMatrixMulAdd %92, %103, %106 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %108 = spirv.IMul %24, %cst1280_i32 : i32 | |
| %109 = spirv.IMul %4, %cst1280_i32 : i32 | |
| %110 = spirv.IAdd %108, %109 : i32 | |
| %111 = spirv.IAdd %110, %54 : i32 | |
| %112 = spirv.IAdd %111, %22 : i32 | |
| %113 = spirv.IAdd %112, %97 : i32 | |
| %114 = spirv.IAdd %113, %cst640_i32 : i32 | |
| %115 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %115, %107, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %116 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %113] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %116, %105, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_55_matmul_160x320x1024, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_55_matmul_160x320x1024 "LocalSize", 128, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_57 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_57_generic_2x77x5x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c2 = arith.constant 2 : index | |
| %c5 = arith.constant 5 : index | |
| %c154 = arith.constant 154 : index | |
| hal.return %c2, %c5, %c154 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_57_generic_2x77x5x64() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst4928_i32 = spirv.Constant 4928 : i32 | |
| %cst24640_i32 = spirv.Constant 24640 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst77_i32 = spirv.Constant 77 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.UDiv %17, %cst77_i32 : i32 | |
| %19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32> | |
| %21 = spirv.UMod %17, %cst77_i32 : i32 | |
| %22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32> | |
| %26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32> | |
| %28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32> | |
| %30 = spirv.IMul %21, %cst320_i32 : i32 | |
| %31 = spirv.IMul %29, %cst320_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IMul %20, %cst64_i32 : i32 | |
| %34 = spirv.IAdd %32, %33 : i32 | |
| %35 = spirv.IMul %27, %cst64_i32 : i32 | |
| %36 = spirv.IAdd %34, %35 : i32 | |
| %37 = spirv.IAdd %36, %25 : i32 | |
| %38 = spirv.IMul %23, %cst32_i32 : i32 | |
| %39 = spirv.IAdd %37, %38 : i32 | |
| %40 = spirv.IMul %18, %cst24640_i32 : i32 | |
| %41 = spirv.IAdd %39, %40 : i32 | |
| %42 = spirv.IAdd %41, %9 : i32 | |
| %43 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %42] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %44 = spirv.Load "StorageBuffer" %43 : f16 | |
| %45 = spirv.IMul %20, %cst4928_i32 : i32 | |
| %46 = spirv.IMul %27, %cst4928_i32 : i32 | |
| %47 = spirv.IAdd %45, %46 : i32 | |
| %48 = spirv.IMul %21, %cst64_i32 : i32 | |
| %49 = spirv.IAdd %47, %48 : i32 | |
| %50 = spirv.IMul %29, %cst64_i32 : i32 | |
| %51 = spirv.IAdd %49, %50 : i32 | |
| %52 = spirv.IAdd %51, %25 : i32 | |
| %53 = spirv.IAdd %52, %38 : i32 | |
| %54 = spirv.IAdd %53, %40 : i32 | |
| %55 = spirv.IAdd %54, %15 : i32 | |
| %56 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %56, %44 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_57_generic_2x77x5x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_57_generic_2x77x5x64 "LocalSize", 32, 1, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_59 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_59_generic_2x320x77 ordinal(0) layout(#pipeline_layout11) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c77 = arith.constant 77 : index | |
| %c10 = arith.constant 10 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c77, %c10, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_59_generic_2x320x77() "None" { | |
| %cst2464_i32 = spirv.Constant 2464 : i32 | |
| %cst77_i32 = spirv.Constant 77 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst24640_i32 = spirv.Constant 24640 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 0.353553385 : f32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %3 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %4 = spirv.Select %2, %3, %1 : i1, i32 | |
| %5 = spirv.SDiv %4, %cst2_i32 : i32 | |
| %6 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %7 = spirv.Select %2, %6, %5 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %9 = spirv.CompositeExtract %8[2 : i32] : vector<3xi32> | |
| %10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32> | |
| %12 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %13 = spirv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %15 = spirv.CompositeExtract %14[0 : i32] : vector<3xi32> | |
| %16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[1 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32> | |
| %20 = spirv.IMul %9, %cst24640_i32 : i32 | |
| %21 = spirv.IMul %19, %cst24640_i32 : i32 | |
| %22 = spirv.IAdd %20, %21 : i32 | |
| %23 = spirv.IMul %13, %cst320_i32 : i32 | |
| %24 = spirv.IAdd %22, %23 : i32 | |
| %25 = spirv.IMul %15, %cst320_i32 : i32 | |
| %26 = spirv.IAdd %24, %25 : i32 | |
| %27 = spirv.IAdd %26, %17 : i32 | |
| %28 = spirv.IMul %11, %cst32_i32 : i32 | |
| %29 = spirv.IAdd %27, %28 : i32 | |
| %30 = spirv.IAdd %29, %7 : i32 | |
| %31 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %32 = spirv.Load "StorageBuffer" %31 : f16 | |
| %33 = spirv.FConvert %cst_f32 : f32 to f16 | |
| %34 = spirv.FMul %32, %33 : f16 | |
| %35 = spirv.IMul %17, %cst77_i32 : i32 | |
| %36 = spirv.IAdd %22, %35 : i32 | |
| %37 = spirv.IMul %11, %cst2464_i32 : i32 | |
| %38 = spirv.IAdd %36, %37 : i32 | |
| %39 = spirv.IAdd %38, %13 : i32 | |
| %40 = spirv.IAdd %39, %15 : i32 | |
| %41 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %41, %34 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_59_generic_2x320x77, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_59_generic_2x320x77 "LocalSize", 1, 32, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_60 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_60 ordinal(0) layout(#pipeline_layout11) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c77 = arith.constant 77 : index | |
| %c2 = arith.constant 2 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c77, %c2, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_60() "None" { | |
| %cst3072_i32 = spirv.Constant 3072 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst6144_i32 = spirv.Constant 6144 : i32 | |
| %cst2464_i32 = spirv.Constant 2464 : i32 | |
| %cst77_i32 = spirv.Constant 77 : i32 | |
| %cst4928_i32 = spirv.Constant 4928 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %2 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %3 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %4 = spirv.Select %2, %3, %1 : i1, i32 | |
| %5 = spirv.SDiv %4, %cst2_i32 : i32 | |
| %6 = spirv.ISub %cst-1_i32, %5 : i32 | |
| %7 = spirv.Select %2, %6, %5 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32> | |
| %10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32> | |
| %12 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %13 = spirv.CompositeExtract %12[2 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %15 = spirv.CompositeExtract %14[0 : i32] : vector<3xi32> | |
| %16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[1 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32> | |
| %20 = spirv.IMul %13, %cst4928_i32 : i32 | |
| %21 = spirv.IMul %19, %cst4928_i32 : i32 | |
| %22 = spirv.IAdd %20, %21 : i32 | |
| %23 = spirv.IMul %17, %cst77_i32 : i32 | |
| %24 = spirv.IAdd %22, %23 : i32 | |
| %25 = spirv.IMul %11, %cst2464_i32 : i32 | |
| %26 = spirv.IAdd %24, %25 : i32 | |
| %27 = spirv.IAdd %26, %9 : i32 | |
| %28 = spirv.IAdd %27, %15 : i32 | |
| %29 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %28] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %30 = spirv.Load "StorageBuffer" %29 : f16 | |
| %31 = spirv.IMul %13, %cst6144_i32 : i32 | |
| %32 = spirv.IMul %19, %cst6144_i32 : i32 | |
| %33 = spirv.IAdd %31, %32 : i32 | |
| %34 = spirv.IMul %17, %cst96_i32 : i32 | |
| %35 = spirv.IAdd %33, %34 : i32 | |
| %36 = spirv.IMul %11, %cst3072_i32 : i32 | |
| %37 = spirv.IAdd %35, %36 : i32 | |
| %38 = spirv.IAdd %37, %9 : i32 | |
| %39 = spirv.IAdd %38, %15 : i32 | |
| %40 = spirv.IAdd %39, %7 : i32 | |
| %41 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %41, %30 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_60, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_60 "LocalSize", 1, 32, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_61 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_61_batch_matmul_10x9216x96x64 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c3 = arith.constant 3 : index | |
| %c144 = arith.constant 144 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c3, %c144, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> { | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_61_batch_matmul_10x9216x96x64() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} { | |
| %cst12_i32 = spirv.Constant 12 : i32 | |
| %false = spirv.Constant false | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst110592_i32 = spirv.Constant 110592 : i32 | |
| %cst240_i32 = spirv.Constant 240 : i32 | |
| %cst402_i32 = spirv.Constant 402 : i32 | |
| %cst400_i32 = spirv.Constant 400 : i32 | |
| %cst322_i32 = spirv.Constant 322 : i32 | |
| %cst480_i32 = spirv.Constant 480 : i32 | |
| %cst260_i32 = spirv.Constant 260 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst320_i32 = spirv.Constant 320 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst768_i32 = spirv.Constant 768 : i32 | |
| %cst384_i32 = spirv.Constant 384 : i32 | |
| %cst192_i32 = spirv.Constant 192 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst73728_i32 = spirv.Constant 73728 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst128_i32 = spirv.Constant 128 : i32 | |
| %cst512_i32 = spirv.Constant 512 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f16 = spirv.Constant 0.000000e+00 : f16 | |
| %0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32> | |
| %3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32> | |
| %5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32> | |
| %__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup> | |
| %__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant> | |
| %7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %8 = spirv.Load "PushConstant" %7 : i32 | |
| %9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %10 = spirv.Load "PushConstant" %9 : i32 | |
| %11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %12 = spirv.Load "PushConstant" %11 : i32 | |
| %13 = spirv.SLessThan %8, %cst0_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %8 : i32 | |
| %15 = spirv.Select %13, %14, %8 : i1, i32 | |
| %16 = spirv.SDiv %15, %cst16_i32 : i32 | |
| %17 = spirv.ISub %cst-1_i32, %16 : i32 | |
| %18 = spirv.Select %13, %17, %16 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %19 = spirv.SLessThan %10, %cst0_i32 : i32 | |
| %20 = spirv.ISub %cst-1_i32, %10 : i32 | |
| %21 = spirv.Select %19, %20, %10 : i1, i32 | |
| %22 = spirv.SDiv %21, %cst16_i32 : i32 | |
| %23 = spirv.ISub %cst-1_i32, %22 : i32 | |
| %24 = spirv.Select %19, %23, %22 : i1, i32 | |
| %25 = spirv.SLessThan %12, %cst0_i32 : i32 | |
| %26 = spirv.ISub %cst-1_i32, %12 : i32 | |
| %27 = spirv.Select %25, %26, %12 : i1, i32 | |
| %28 = spirv.SDiv %27, %cst16_i32 : i32 | |
| %29 = spirv.ISub %cst-1_i32, %28 : i32 | |
| %30 = spirv.Select %25, %29, %28 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32> | |
| %33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32> | |
| %35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32> | |
| %37 = spirv.IMul %34, %cst512_i32 : i32 | |
| %38 = spirv.IAdd %37, %2 : i32 | |
| %39 = spirv.IMul %4, %cst128_i32 : i32 | |
| %40 = spirv.IAdd %38, %39 : i32 | |
| %41 = spirv.IMul %6, %cst256_i32 : i32 | |
| %42 = spirv.IAdd %40, %41 : i32 | |
| %43 = spirv.IMul %32, %cst73728_i32 : i32 | |
| %44 = spirv.IAdd %42, %43 : i32 | |
| %45 = spirv.IAdd %44, %18 : i32 | |
| %46 = spirv.SLessThan %2, %cst0_i32 : i32 | |
| %47 = spirv.ISub %cst-1_i32, %2 : i32 | |
| %48 = spirv.Select %46, %47, %2 : i1, i32 | |
| %49 = spirv.SDiv %48, %cst4_i32 : i32 | |
| %50 = spirv.ISub %cst-1_i32, %49 : i32 | |
| %51 = spirv.Select %46, %50, %49 : i1, i32 | |
| %52 = spirv.IMul %51, %cst4_i32 : i32 | |
| %53 = spirv.IAdd %45, %52 : i32 | |
| %54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %55 = spirv.Load "StorageBuffer" %54 : vector<4xf32> | |
| %56 = spirv.IMul %4, %cst80_i32 : i32 | |
| %57 = spirv.IAdd %2, %56 : i32 | |
| %58 = spirv.IMul %6, %cst160_i32 : i32 | |
| %59 = spirv.IAdd %57, %58 : i32 | |
| %60 = spirv.IAdd %59, %51 : i32 | |
| %61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %61, %55 : vector<4xf32> | |
| %62 = spirv.IAdd %53, %cst256_i32 : i32 | |
| %63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %64 = spirv.Load "StorageBuffer" %63 : vector<4xf32> | |
| %65 = spirv.IAdd %60, %cst160_i32 : i32 | |
| %66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %66, %64 : vector<4xf32> | |
| %67 = spirv.IMul %4, %cst192_i32 : i32 | |
| %68 = spirv.IAdd %2, %67 : i32 | |
| %69 = spirv.IMul %6, %cst384_i32 : i32 | |
| %70 = spirv.IAdd %68, %69 : i32 | |
| %71 = spirv.IMul %36, %cst4_i32 : i32 | |
| %72 = spirv.IAdd %70, %71 : i32 | |
| %73 = spirv.IMul %32, %cst768_i32 : i32 | |
| %74 = spirv.IAdd %72, %73 : i32 | |
| %75 = spirv.IAdd %74, %24 : i32 | |
| %76 = spirv.IMul %51, %cst8_i32 : i32 | |
| %77 = spirv.IAdd %75, %76 : i32 | |
| %78 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %79 = spirv.Load "StorageBuffer" %78 : vector<4xf32> | |
| %80 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %80, %79 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %81 = spirv.IMul %4, %cst160_i32 : i32 | |
| %82 = spirv.IMul %6, %cst320_i32 : i32 | |
| %83 = spirv.IAdd %81, %82 : i32 | |
| %84 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %85 = spirv.NV.CooperativeMatrixLoad %84, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %86 = spirv.IAdd %83, %cst2_i32 : i32 | |
| %87 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %88 = spirv.NV.CooperativeMatrixLoad %87, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %89 = spirv.IAdd %83, %cst80_i32 : i32 | |
| %90 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %91 = spirv.NV.CooperativeMatrixLoad %90, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %92 = spirv.IAdd %83, %cst82_i32 : i32 | |
| %93 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %94 = spirv.NV.CooperativeMatrixLoad %93, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %95 = spirv.SDiv %48, %cst32_i32 : i32 | |
| %96 = spirv.ISub %cst-1_i32, %95 : i32 | |
| %97 = spirv.Select %46, %96, %95 : i1, i32 | |
| %98 = spirv.IMul %97, %cst2_i32 : i32 | |
| %99 = spirv.IAdd %58, %98 : i32 | |
| %100 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %101 = spirv.NV.CooperativeMatrixLoad %100, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %102 = spirv.IAdd %99, %cst80_i32 : i32 | |
| %103 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %104 = spirv.NV.CooperativeMatrixLoad %103, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %105 = spirv.NV.CooperativeMatrixMulAdd %85, %101, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %106 = spirv.NV.CooperativeMatrixMulAdd %88, %104, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %107 = spirv.NV.CooperativeMatrixMulAdd %91, %101, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %108 = spirv.NV.CooperativeMatrixMulAdd %94, %104, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %109 = spirv.IAdd %53, %cst4_i32 : i32 | |
| %110 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %111 = spirv.Load "StorageBuffer" %110 : vector<4xf32> | |
| %112 = spirv.IAdd %60, %cst320_i32 : i32 | |
| %113 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %113, %111 : vector<4xf32> | |
| %114 = spirv.IAdd %53, %cst260_i32 : i32 | |
| %115 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %116 = spirv.Load "StorageBuffer" %115 : vector<4xf32> | |
| %117 = spirv.IAdd %60, %cst480_i32 : i32 | |
| %118 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %118, %116 : vector<4xf32> | |
| %119 = spirv.IAdd %77, %cst384_i32 : i32 | |
| %120 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %119] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| %121 = spirv.Load "StorageBuffer" %120 : vector<4xf32> | |
| %122 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| spirv.Store "Workgroup" %122, %121 : vector<4xf32> | |
| spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory> | |
| %123 = spirv.IAdd %83, %cst320_i32 : i32 | |
| %124 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %126 = spirv.IAdd %83, %cst322_i32 : i32 | |
| %127 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %129 = spirv.IAdd %83, %cst400_i32 : i32 | |
| %130 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %132 = spirv.IAdd %83, %cst402_i32 : i32 | |
| %133 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %132] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %134 = spirv.NV.CooperativeMatrixLoad %133, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %135 = spirv.IAdd %99, %cst160_i32 : i32 | |
| %136 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %137 = spirv.NV.CooperativeMatrixLoad %136, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %138 = spirv.IAdd %99, %cst240_i32 : i32 | |
| %139 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %138] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32 | |
| %140 = spirv.NV.CooperativeMatrixLoad %139, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %141 = spirv.NV.CooperativeMatrixMulAdd %125, %137, %106 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %142 = spirv.NV.CooperativeMatrixMulAdd %128, %140, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %143 = spirv.NV.CooperativeMatrixMulAdd %131, %137, %108 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %144 = spirv.NV.CooperativeMatrixMulAdd %134, %140, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %145 = spirv.IMul %32, %cst110592_i32 : i32 | |
| %146 = spirv.IMul %6, %cst110592_i32 : i32 | |
| %147 = spirv.IAdd %145, %146 : i32 | |
| %148 = spirv.IMul %34, %cst768_i32 : i32 | |
| %149 = spirv.IAdd %147, %148 : i32 | |
| %150 = spirv.IMul %4, %cst384_i32 : i32 | |
| %151 = spirv.IAdd %149, %150 : i32 | |
| %152 = spirv.IAdd %151, %71 : i32 | |
| %153 = spirv.IAdd %152, %30 : i32 | |
| %154 = spirv.IAdd %153, %98 : i32 | |
| %155 = spirv.IAdd %154, %cst192_i32 : i32 | |
| %156 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %155] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %156, %144, %cst12_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| %157 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32 | |
| spirv.NV.CooperativeMatrixStore %157, %142, %cst12_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup> | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_61_batch_matmul_10x9216x96x64, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__ | |
| spirv.ExecutionMode @forward_dispatch_61_batch_matmul_10x9216x96x64 "LocalSize", 64, 2, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_62 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_62 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
| %c77 = arith.constant 77 : index | |
| %c288 = arith.constant 288 : index | |
| %c10 = arith.constant 10 : index | |
| hal.return %c77, %c288, %c10 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_62() "None" { | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst2464_i32 = spirv.Constant 2464 : i32 | |
| %cst77_i32 = spirv.Constant 77 : i32 | |
| %cst709632_i32 = spirv.Constant 709632 : i32 | |
| %cst3072_i32 = spirv.Constant 3072 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst884736_i32 = spirv.Constant 884736 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[2 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32> | |
| %26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %27 = spirv.CompositeExtract %26[2 : i32] : vector<3xi32> | |
| %28 = spirv.IMul %21, %cst884736_i32 : i32 | |
| %29 = spirv.IMul %27, %cst884736_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IMul %25, %cst96_i32 : i32 | |
| %32 = spirv.IAdd %30, %31 : i32 | |
| %33 = spirv.IMul %19, %cst3072_i32 : i32 | |
| %34 = spirv.IAdd %32, %33 : i32 | |
| %35 = spirv.IAdd %34, %17 : i32 | |
| %36 = spirv.IAdd %35, %23 : i32 | |
| %37 = spirv.IAdd %36, %9 : i32 | |
| %38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %39 = spirv.Load "StorageBuffer" %38 : f16 | |
| %40 = spirv.IMul %21, %cst709632_i32 : i32 | |
| %41 = spirv.IMul %27, %cst709632_i32 : i32 | |
| %42 = spirv.IAdd %40, %41 : i32 | |
| %43 = spirv.IMul %25, %cst77_i32 : i32 | |
| %44 = spirv.IAdd %42, %43 : i32 | |
| %45 = spirv.IMul %19, %cst2464_i32 : i32 | |
| %46 = spirv.IAdd %44, %45 : i32 | |
| %47 = spirv.IAdd %46, %17 : i32 | |
| %48 = spirv.IAdd %47, %23 : i32 | |
| %49 = spirv.IAdd %48, %15 : i32 | |
| %50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| spirv.Store "StorageBuffer" %50, %39 : f16 | |
| spirv.Return | |
| } | |
| spirv.EntryPoint "GLCompute" @forward_dispatch_62, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
| spirv.ExecutionMode @forward_dispatch_62 "LocalSize", 1, 32, 1 | |
| } | |
| } | |
| } | |
| } | |
| hal.executable private @forward_dispatch_63 { | |
| hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb { | |
| hal.executable.export public @forward_dispatch_63 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} { | |
| ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| %c72 = arith.constant 72 : index | |
| %c5 = arith.constant 5 : index | |
| %c2 = arith.constant 2 : index | |
| hal.return %c72, %c5, %c2 : index, index, index | |
| } | |
| builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} { | |
| spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> { | |
| spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input> | |
| spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| spirv.func @forward_dispatch_63() "None" { | |
| %cst307_i32 = spirv.Constant 307 : i32 | |
| %cst306_i32 = spirv.Constant 306 : i32 | |
| %cst305_i32 = spirv.Constant 305 : i32 | |
| %cst304_i32 = spirv.Constant 304 : i32 | |
| %cst303_i32 = spirv.Constant 303 : i32 | |
| %cst302_i32 = spirv.Constant 302 : i32 | |
| %cst301_i32 = spirv.Constant 301 : i32 | |
| %cst300_i32 = spirv.Constant 300 : i32 | |
| %cst299_i32 = spirv.Constant 299 : i32 | |
| %cst298_i32 = spirv.Constant 298 : i32 | |
| %cst297_i32 = spirv.Constant 297 : i32 | |
| %cst296_i32 = spirv.Constant 296 : i32 | |
| %cst295_i32 = spirv.Constant 295 : i32 | |
| %cst294_i32 = spirv.Constant 294 : i32 | |
| %cst293_i32 = spirv.Constant 293 : i32 | |
| %cst292_i32 = spirv.Constant 292 : i32 | |
| %cst291_i32 = spirv.Constant 291 : i32 | |
| %cst290_i32 = spirv.Constant 290 : i32 | |
| %cst289_i32 = spirv.Constant 289 : i32 | |
| %cst288_i32 = spirv.Constant 288 : i32 | |
| %cst287_i32 = spirv.Constant 287 : i32 | |
| %cst286_i32 = spirv.Constant 286 : i32 | |
| %cst285_i32 = spirv.Constant 285 : i32 | |
| %cst284_i32 = spirv.Constant 284 : i32 | |
| %cst283_i32 = spirv.Constant 283 : i32 | |
| %cst282_i32 = spirv.Constant 282 : i32 | |
| %cst281_i32 = spirv.Constant 281 : i32 | |
| %cst280_i32 = spirv.Constant 280 : i32 | |
| %cst279_i32 = spirv.Constant 279 : i32 | |
| %cst278_i32 = spirv.Constant 278 : i32 | |
| %cst277_i32 = spirv.Constant 277 : i32 | |
| %cst276_i32 = spirv.Constant 276 : i32 | |
| %cst275_i32 = spirv.Constant 275 : i32 | |
| %cst274_i32 = spirv.Constant 274 : i32 | |
| %cst273_i32 = spirv.Constant 273 : i32 | |
| %cst272_i32 = spirv.Constant 272 : i32 | |
| %cst271_i32 = spirv.Constant 271 : i32 | |
| %cst270_i32 = spirv.Constant 270 : i32 | |
| %cst269_i32 = spirv.Constant 269 : i32 | |
| %cst268_i32 = spirv.Constant 268 : i32 | |
| %cst267_i32 = spirv.Constant 267 : i32 | |
| %cst266_i32 = spirv.Constant 266 : i32 | |
| %cst265_i32 = spirv.Constant 265 : i32 | |
| %cst264_i32 = spirv.Constant 264 : i32 | |
| %cst263_i32 = spirv.Constant 263 : i32 | |
| %cst262_i32 = spirv.Constant 262 : i32 | |
| %cst261_i32 = spirv.Constant 261 : i32 | |
| %cst260_i32 = spirv.Constant 260 : i32 | |
| %cst259_i32 = spirv.Constant 259 : i32 | |
| %cst258_i32 = spirv.Constant 258 : i32 | |
| %cst257_i32 = spirv.Constant 257 : i32 | |
| %cst256_i32 = spirv.Constant 256 : i32 | |
| %cst255_i32 = spirv.Constant 255 : i32 | |
| %cst254_i32 = spirv.Constant 254 : i32 | |
| %cst253_i32 = spirv.Constant 253 : i32 | |
| %cst252_i32 = spirv.Constant 252 : i32 | |
| %cst251_i32 = spirv.Constant 251 : i32 | |
| %cst250_i32 = spirv.Constant 250 : i32 | |
| %cst249_i32 = spirv.Constant 249 : i32 | |
| %cst248_i32 = spirv.Constant 248 : i32 | |
| %cst247_i32 = spirv.Constant 247 : i32 | |
| %cst246_i32 = spirv.Constant 246 : i32 | |
| %cst245_i32 = spirv.Constant 245 : i32 | |
| %cst244_i32 = spirv.Constant 244 : i32 | |
| %cst243_i32 = spirv.Constant 243 : i32 | |
| %cst242_i32 = spirv.Constant 242 : i32 | |
| %cst241_i32 = spirv.Constant 241 : i32 | |
| %cst240_i32 = spirv.Constant 240 : i32 | |
| %cst239_i32 = spirv.Constant 239 : i32 | |
| %cst238_i32 = spirv.Constant 238 : i32 | |
| %cst237_i32 = spirv.Constant 237 : i32 | |
| %cst236_i32 = spirv.Constant 236 : i32 | |
| %cst235_i32 = spirv.Constant 235 : i32 | |
| %cst234_i32 = spirv.Constant 234 : i32 | |
| %cst233_i32 = spirv.Constant 233 : i32 | |
| %cst232_i32 = spirv.Constant 232 : i32 | |
| %cst231_i32 = spirv.Constant 231 : i32 | |
| %cst230_i32 = spirv.Constant 230 : i32 | |
| %cst229_i32 = spirv.Constant 229 : i32 | |
| %cst228_i32 = spirv.Constant 228 : i32 | |
| %cst227_i32 = spirv.Constant 227 : i32 | |
| %cst226_i32 = spirv.Constant 226 : i32 | |
| %cst225_i32 = spirv.Constant 225 : i32 | |
| %cst224_i32 = spirv.Constant 224 : i32 | |
| %cst223_i32 = spirv.Constant 223 : i32 | |
| %cst222_i32 = spirv.Constant 222 : i32 | |
| %cst221_i32 = spirv.Constant 221 : i32 | |
| %cst220_i32 = spirv.Constant 220 : i32 | |
| %cst219_i32 = spirv.Constant 219 : i32 | |
| %cst218_i32 = spirv.Constant 218 : i32 | |
| %cst217_i32 = spirv.Constant 217 : i32 | |
| %cst216_i32 = spirv.Constant 216 : i32 | |
| %cst215_i32 = spirv.Constant 215 : i32 | |
| %cst214_i32 = spirv.Constant 214 : i32 | |
| %cst213_i32 = spirv.Constant 213 : i32 | |
| %cst212_i32 = spirv.Constant 212 : i32 | |
| %cst211_i32 = spirv.Constant 211 : i32 | |
| %cst210_i32 = spirv.Constant 210 : i32 | |
| %cst209_i32 = spirv.Constant 209 : i32 | |
| %cst208_i32 = spirv.Constant 208 : i32 | |
| %cst207_i32 = spirv.Constant 207 : i32 | |
| %cst206_i32 = spirv.Constant 206 : i32 | |
| %cst205_i32 = spirv.Constant 205 : i32 | |
| %cst204_i32 = spirv.Constant 204 : i32 | |
| %cst203_i32 = spirv.Constant 203 : i32 | |
| %cst202_i32 = spirv.Constant 202 : i32 | |
| %cst201_i32 = spirv.Constant 201 : i32 | |
| %cst200_i32 = spirv.Constant 200 : i32 | |
| %cst199_i32 = spirv.Constant 199 : i32 | |
| %cst198_i32 = spirv.Constant 198 : i32 | |
| %cst197_i32 = spirv.Constant 197 : i32 | |
| %cst196_i32 = spirv.Constant 196 : i32 | |
| %cst195_i32 = spirv.Constant 195 : i32 | |
| %cst194_i32 = spirv.Constant 194 : i32 | |
| %cst193_i32 = spirv.Constant 193 : i32 | |
| %cst192_i32 = spirv.Constant 192 : i32 | |
| %cst191_i32 = spirv.Constant 191 : i32 | |
| %cst190_i32 = spirv.Constant 190 : i32 | |
| %cst189_i32 = spirv.Constant 189 : i32 | |
| %cst188_i32 = spirv.Constant 188 : i32 | |
| %cst187_i32 = spirv.Constant 187 : i32 | |
| %cst186_i32 = spirv.Constant 186 : i32 | |
| %cst185_i32 = spirv.Constant 185 : i32 | |
| %cst184_i32 = spirv.Constant 184 : i32 | |
| %cst183_i32 = spirv.Constant 183 : i32 | |
| %cst182_i32 = spirv.Constant 182 : i32 | |
| %cst181_i32 = spirv.Constant 181 : i32 | |
| %cst180_i32 = spirv.Constant 180 : i32 | |
| %cst179_i32 = spirv.Constant 179 : i32 | |
| %cst178_i32 = spirv.Constant 178 : i32 | |
| %cst177_i32 = spirv.Constant 177 : i32 | |
| %cst176_i32 = spirv.Constant 176 : i32 | |
| %cst175_i32 = spirv.Constant 175 : i32 | |
| %cst174_i32 = spirv.Constant 174 : i32 | |
| %cst173_i32 = spirv.Constant 173 : i32 | |
| %cst172_i32 = spirv.Constant 172 : i32 | |
| %cst171_i32 = spirv.Constant 171 : i32 | |
| %cst170_i32 = spirv.Constant 170 : i32 | |
| %cst169_i32 = spirv.Constant 169 : i32 | |
| %cst168_i32 = spirv.Constant 168 : i32 | |
| %cst167_i32 = spirv.Constant 167 : i32 | |
| %cst166_i32 = spirv.Constant 166 : i32 | |
| %cst165_i32 = spirv.Constant 165 : i32 | |
| %cst164_i32 = spirv.Constant 164 : i32 | |
| %cst163_i32 = spirv.Constant 163 : i32 | |
| %cst162_i32 = spirv.Constant 162 : i32 | |
| %cst161_i32 = spirv.Constant 161 : i32 | |
| %cst160_i32 = spirv.Constant 160 : i32 | |
| %cst159_i32 = spirv.Constant 159 : i32 | |
| %cst158_i32 = spirv.Constant 158 : i32 | |
| %cst157_i32 = spirv.Constant 157 : i32 | |
| %cst156_i32 = spirv.Constant 156 : i32 | |
| %cst155_i32 = spirv.Constant 155 : i32 | |
| %cst154_i32 = spirv.Constant 154 : i32 | |
| %cst153_i32 = spirv.Constant 153 : i32 | |
| %cst152_i32 = spirv.Constant 152 : i32 | |
| %cst151_i32 = spirv.Constant 151 : i32 | |
| %cst150_i32 = spirv.Constant 150 : i32 | |
| %cst149_i32 = spirv.Constant 149 : i32 | |
| %cst148_i32 = spirv.Constant 148 : i32 | |
| %cst147_i32 = spirv.Constant 147 : i32 | |
| %cst146_i32 = spirv.Constant 146 : i32 | |
| %cst145_i32 = spirv.Constant 145 : i32 | |
| %cst144_i32 = spirv.Constant 144 : i32 | |
| %cst143_i32 = spirv.Constant 143 : i32 | |
| %cst142_i32 = spirv.Constant 142 : i32 | |
| %cst141_i32 = spirv.Constant 141 : i32 | |
| %cst140_i32 = spirv.Constant 140 : i32 | |
| %cst139_i32 = spirv.Constant 139 : i32 | |
| %cst138_i32 = spirv.Constant 138 : i32 | |
| %cst137_i32 = spirv.Constant 137 : i32 | |
| %cst136_i32 = spirv.Constant 136 : i32 | |
| %cst135_i32 = spirv.Constant 135 : i32 | |
| %cst134_i32 = spirv.Constant 134 : i32 | |
| %cst133_i32 = spirv.Constant 133 : i32 | |
| %cst132_i32 = spirv.Constant 132 : i32 | |
| %cst131_i32 = spirv.Constant 131 : i32 | |
| %cst130_i32 = spirv.Constant 130 : i32 | |
| %cst129_i32 = spirv.Constant 129 : i32 | |
| %cst128_i32 = spirv.Constant 128 : i32 | |
| %cst127_i32 = spirv.Constant 127 : i32 | |
| %cst126_i32 = spirv.Constant 126 : i32 | |
| %cst125_i32 = spirv.Constant 125 : i32 | |
| %cst124_i32 = spirv.Constant 124 : i32 | |
| %cst123_i32 = spirv.Constant 123 : i32 | |
| %cst122_i32 = spirv.Constant 122 : i32 | |
| %cst121_i32 = spirv.Constant 121 : i32 | |
| %cst120_i32 = spirv.Constant 120 : i32 | |
| %cst119_i32 = spirv.Constant 119 : i32 | |
| %cst118_i32 = spirv.Constant 118 : i32 | |
| %cst117_i32 = spirv.Constant 117 : i32 | |
| %cst116_i32 = spirv.Constant 116 : i32 | |
| %cst115_i32 = spirv.Constant 115 : i32 | |
| %cst114_i32 = spirv.Constant 114 : i32 | |
| %cst113_i32 = spirv.Constant 113 : i32 | |
| %cst112_i32 = spirv.Constant 112 : i32 | |
| %cst111_i32 = spirv.Constant 111 : i32 | |
| %cst110_i32 = spirv.Constant 110 : i32 | |
| %cst109_i32 = spirv.Constant 109 : i32 | |
| %cst108_i32 = spirv.Constant 108 : i32 | |
| %cst107_i32 = spirv.Constant 107 : i32 | |
| %cst106_i32 = spirv.Constant 106 : i32 | |
| %cst105_i32 = spirv.Constant 105 : i32 | |
| %cst104_i32 = spirv.Constant 104 : i32 | |
| %cst103_i32 = spirv.Constant 103 : i32 | |
| %cst102_i32 = spirv.Constant 102 : i32 | |
| %cst101_i32 = spirv.Constant 101 : i32 | |
| %cst100_i32 = spirv.Constant 100 : i32 | |
| %cst99_i32 = spirv.Constant 99 : i32 | |
| %cst98_i32 = spirv.Constant 98 : i32 | |
| %cst97_i32 = spirv.Constant 97 : i32 | |
| %cst96_i32 = spirv.Constant 96 : i32 | |
| %cst95_i32 = spirv.Constant 95 : i32 | |
| %cst94_i32 = spirv.Constant 94 : i32 | |
| %cst93_i32 = spirv.Constant 93 : i32 | |
| %cst92_i32 = spirv.Constant 92 : i32 | |
| %cst91_i32 = spirv.Constant 91 : i32 | |
| %cst90_i32 = spirv.Constant 90 : i32 | |
| %cst89_i32 = spirv.Constant 89 : i32 | |
| %cst88_i32 = spirv.Constant 88 : i32 | |
| %cst87_i32 = spirv.Constant 87 : i32 | |
| %cst86_i32 = spirv.Constant 86 : i32 | |
| %cst85_i32 = spirv.Constant 85 : i32 | |
| %cst84_i32 = spirv.Constant 84 : i32 | |
| %cst83_i32 = spirv.Constant 83 : i32 | |
| %cst82_i32 = spirv.Constant 82 : i32 | |
| %cst81_i32 = spirv.Constant 81 : i32 | |
| %cst80_i32 = spirv.Constant 80 : i32 | |
| %cst79_i32 = spirv.Constant 79 : i32 | |
| %cst78_i32 = spirv.Constant 78 : i32 | |
| %cst77_i32 = spirv.Constant 77 : i32 | |
| %cst76_i32 = spirv.Constant 76 : i32 | |
| %cst75_i32 = spirv.Constant 75 : i32 | |
| %cst74_i32 = spirv.Constant 74 : i32 | |
| %cst73_i32 = spirv.Constant 73 : i32 | |
| %cst72_i32 = spirv.Constant 72 : i32 | |
| %cst71_i32 = spirv.Constant 71 : i32 | |
| %cst70_i32 = spirv.Constant 70 : i32 | |
| %cst69_i32 = spirv.Constant 69 : i32 | |
| %cst68_i32 = spirv.Constant 68 : i32 | |
| %cst67_i32 = spirv.Constant 67 : i32 | |
| %cst66_i32 = spirv.Constant 66 : i32 | |
| %cst65_i32 = spirv.Constant 65 : i32 | |
| %cst64_i32 = spirv.Constant 64 : i32 | |
| %cst63_i32 = spirv.Constant 63 : i32 | |
| %cst62_i32 = spirv.Constant 62 : i32 | |
| %cst61_i32 = spirv.Constant 61 : i32 | |
| %cst60_i32 = spirv.Constant 60 : i32 | |
| %cst59_i32 = spirv.Constant 59 : i32 | |
| %cst58_i32 = spirv.Constant 58 : i32 | |
| %cst57_i32 = spirv.Constant 57 : i32 | |
| %cst56_i32 = spirv.Constant 56 : i32 | |
| %cst55_i32 = spirv.Constant 55 : i32 | |
| %cst54_i32 = spirv.Constant 54 : i32 | |
| %cst53_i32 = spirv.Constant 53 : i32 | |
| %cst52_i32 = spirv.Constant 52 : i32 | |
| %cst51_i32 = spirv.Constant 51 : i32 | |
| %cst50_i32 = spirv.Constant 50 : i32 | |
| %cst49_i32 = spirv.Constant 49 : i32 | |
| %cst48_i32 = spirv.Constant 48 : i32 | |
| %cst47_i32 = spirv.Constant 47 : i32 | |
| %cst46_i32 = spirv.Constant 46 : i32 | |
| %cst45_i32 = spirv.Constant 45 : i32 | |
| %cst44_i32 = spirv.Constant 44 : i32 | |
| %cst43_i32 = spirv.Constant 43 : i32 | |
| %cst42_i32 = spirv.Constant 42 : i32 | |
| %cst41_i32 = spirv.Constant 41 : i32 | |
| %cst40_i32 = spirv.Constant 40 : i32 | |
| %cst39_i32 = spirv.Constant 39 : i32 | |
| %cst38_i32 = spirv.Constant 38 : i32 | |
| %cst37_i32 = spirv.Constant 37 : i32 | |
| %cst36_i32 = spirv.Constant 36 : i32 | |
| %cst35_i32 = spirv.Constant 35 : i32 | |
| %cst34_i32 = spirv.Constant 34 : i32 | |
| %cst33_i32 = spirv.Constant 33 : i32 | |
| %cst32_i32 = spirv.Constant 32 : i32 | |
| %cst31_i32 = spirv.Constant 31 : i32 | |
| %cst30_i32 = spirv.Constant 30 : i32 | |
| %cst29_i32 = spirv.Constant 29 : i32 | |
| %cst28_i32 = spirv.Constant 28 : i32 | |
| %cst27_i32 = spirv.Constant 27 : i32 | |
| %cst26_i32 = spirv.Constant 26 : i32 | |
| %cst25_i32 = spirv.Constant 25 : i32 | |
| %cst24_i32 = spirv.Constant 24 : i32 | |
| %cst23_i32 = spirv.Constant 23 : i32 | |
| %cst22_i32 = spirv.Constant 22 : i32 | |
| %cst21_i32 = spirv.Constant 21 : i32 | |
| %cst20_i32 = spirv.Constant 20 : i32 | |
| %cst19_i32 = spirv.Constant 19 : i32 | |
| %cst18_i32 = spirv.Constant 18 : i32 | |
| %cst17_i32 = spirv.Constant 17 : i32 | |
| %cst16_i32 = spirv.Constant 16 : i32 | |
| %cst15_i32 = spirv.Constant 15 : i32 | |
| %cst14_i32 = spirv.Constant 14 : i32 | |
| %cst13_i32 = spirv.Constant 13 : i32 | |
| %cst12_i32 = spirv.Constant 12 : i32 | |
| %cst11_i32 = spirv.Constant 11 : i32 | |
| %cst10_i32 = spirv.Constant 10 : i32 | |
| %cst9_i32 = spirv.Constant 9 : i32 | |
| %cst8_i32 = spirv.Constant 8 : i32 | |
| %cst7_i32 = spirv.Constant 7 : i32 | |
| %cst6_i32 = spirv.Constant 6 : i32 | |
| %cst5_i32 = spirv.Constant 5 : i32 | |
| %cst4_i32 = spirv.Constant 4 : i32 | |
| %cst3_i32 = spirv.Constant 3 : i32 | |
| %cst1_i32 = spirv.Constant 1 : i32 | |
| %cst709632_i32 = spirv.Constant 709632 : i32 | |
| %cst3548160_i32 = spirv.Constant 3548160 : i32 | |
| %cst308_i32 = spirv.Constant 308 : i32 | |
| %cst9856_i32 = spirv.Constant 9856 : i32 | |
| %cst-1_i32 = spirv.Constant -1 : i32 | |
| %cst2_i32 = spirv.Constant 2 : i32 | |
| %cst0_i32 = spirv.Constant 0 : i32 | |
| %cst_f32 = spirv.Constant 0.693147182 : f32 | |
| %cst_f32_0 = spirv.Constant 1.44269502 : f32 | |
| %cst_f32_1 = spirv.Constant 1.000000e+00 : f32 | |
| %cst_f32_2 = spirv.Constant 0.499705136 : f32 | |
| %cst_f32_3 = spirv.Constant 0.168738902 : f32 | |
| %cst_f32_4 = spirv.Constant 0.0366896503 : f32 | |
| %cst_f32_5 = spirv.Constant 1.314350e-02 : f32 | |
| %cst_f32_6 = spirv.Constant 0.000000e+00 : f32 | |
| %cst_f32_7 = spirv.Constant 0x7F800000 : f32 | |
| %cst_f32_8 = spirv.Constant 0xFF800000 : f32 | |
| %cst_f32_9 = spirv.Constant 1.17549435E-38 : f32 | |
| %cst-127_i32 = spirv.Constant -127 : i32 | |
| %cst_vec_4xf16 = spirv.Constant dense<0.000000e+00> : vector<4xf16> | |
| %__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant> | |
| %0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %1 = spirv.Load "PushConstant" %0 : i32 | |
| %2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32 | |
| %3 = spirv.Load "PushConstant" %2 : i32 | |
| %4 = spirv.SLessThan %1, %cst0_i32 : i32 | |
| %5 = spirv.ISub %cst-1_i32, %1 : i32 | |
| %6 = spirv.Select %4, %5, %1 : i1, i32 | |
| %7 = spirv.SDiv %6, %cst2_i32 : i32 | |
| %8 = spirv.ISub %cst-1_i32, %7 : i32 | |
| %9 = spirv.Select %4, %8, %7 : i1, i32 | |
| %__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %10 = spirv.SLessThan %3, %cst0_i32 : i32 | |
| %11 = spirv.ISub %cst-1_i32, %3 : i32 | |
| %12 = spirv.Select %10, %11, %3 : i1, i32 | |
| %13 = spirv.SDiv %12, %cst2_i32 : i32 | |
| %14 = spirv.ISub %cst-1_i32, %13 : i32 | |
| %15 = spirv.Select %10, %14, %13 : i1, i32 | |
| %__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer> | |
| %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32> | |
| %18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32> | |
| %20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32> | |
| %21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32> | |
| %__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input> | |
| %22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32> | |
| %23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32> | |
| %24 = spirv.IMul %21, %cst9856_i32 : i32 | |
| %25 = spirv.IMul %23, %cst308_i32 : i32 | |
| %26 = spirv.IAdd %24, %25 : i32 | |
| %27 = spirv.IMul %17, %cst3548160_i32 : i32 | |
| %28 = spirv.IAdd %26, %27 : i32 | |
| %29 = spirv.IMul %19, %cst709632_i32 : i32 | |
| %30 = spirv.IAdd %28, %29 : i32 | |
| %31 = spirv.IAdd %30, %9 : i32 | |
| %32 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %33 = spirv.Load "StorageBuffer" %32 : f16 | |
| %34 = spirv.IAdd %31, %cst1_i32 : i32 | |
| %35 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %34] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %36 = spirv.Load "StorageBuffer" %35 : f16 | |
| %37 = spirv.IAdd %31, %cst2_i32 : i32 | |
| %38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %39 = spirv.Load "StorageBuffer" %38 : f16 | |
| %40 = spirv.IAdd %31, %cst3_i32 : i32 | |
| %41 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %42 = spirv.Load "StorageBuffer" %41 : f16 | |
| %43 = spirv.IAdd %31, %cst4_i32 : i32 | |
| %44 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %43] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %45 = spirv.Load "StorageBuffer" %44 : f16 | |
| %46 = spirv.IAdd %31, %cst5_i32 : i32 | |
| %47 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %46] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %48 = spirv.Load "StorageBuffer" %47 : f16 | |
| %49 = spirv.IAdd %31, %cst6_i32 : i32 | |
| %50 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %51 = spirv.Load "StorageBuffer" %50 : f16 | |
| %52 = spirv.IAdd %31, %cst7_i32 : i32 | |
| %53 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %54 = spirv.Load "StorageBuffer" %53 : f16 | |
| %55 = spirv.IAdd %31, %cst8_i32 : i32 | |
| %56 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %57 = spirv.Load "StorageBuffer" %56 : f16 | |
| %58 = spirv.IAdd %31, %cst9_i32 : i32 | |
| %59 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %60 = spirv.Load "StorageBuffer" %59 : f16 | |
| %61 = spirv.IAdd %31, %cst10_i32 : i32 | |
| %62 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %63 = spirv.Load "StorageBuffer" %62 : f16 | |
| %64 = spirv.IAdd %31, %cst11_i32 : i32 | |
| %65 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %64] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %66 = spirv.Load "StorageBuffer" %65 : f16 | |
| %67 = spirv.IAdd %31, %cst12_i32 : i32 | |
| %68 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %69 = spirv.Load "StorageBuffer" %68 : f16 | |
| %70 = spirv.IAdd %31, %cst13_i32 : i32 | |
| %71 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %72 = spirv.Load "StorageBuffer" %71 : f16 | |
| %73 = spirv.IAdd %31, %cst14_i32 : i32 | |
| %74 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %75 = spirv.Load "StorageBuffer" %74 : f16 | |
| %76 = spirv.IAdd %31, %cst15_i32 : i32 | |
| %77 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %78 = spirv.Load "StorageBuffer" %77 : f16 | |
| %79 = spirv.IAdd %31, %cst16_i32 : i32 | |
| %80 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %81 = spirv.Load "StorageBuffer" %80 : f16 | |
| %82 = spirv.IAdd %31, %cst17_i32 : i32 | |
| %83 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %82] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %84 = spirv.Load "StorageBuffer" %83 : f16 | |
| %85 = spirv.IAdd %31, %cst18_i32 : i32 | |
| %86 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %87 = spirv.Load "StorageBuffer" %86 : f16 | |
| %88 = spirv.IAdd %31, %cst19_i32 : i32 | |
| %89 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %90 = spirv.Load "StorageBuffer" %89 : f16 | |
| %91 = spirv.IAdd %31, %cst20_i32 : i32 | |
| %92 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %93 = spirv.Load "StorageBuffer" %92 : f16 | |
| %94 = spirv.IAdd %31, %cst21_i32 : i32 | |
| %95 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %96 = spirv.Load "StorageBuffer" %95 : f16 | |
| %97 = spirv.IAdd %31, %cst22_i32 : i32 | |
| %98 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %97] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %99 = spirv.Load "StorageBuffer" %98 : f16 | |
| %100 = spirv.IAdd %31, %cst23_i32 : i32 | |
| %101 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %100] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %102 = spirv.Load "StorageBuffer" %101 : f16 | |
| %103 = spirv.IAdd %31, %cst24_i32 : i32 | |
| %104 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %105 = spirv.Load "StorageBuffer" %104 : f16 | |
| %106 = spirv.IAdd %31, %cst25_i32 : i32 | |
| %107 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %108 = spirv.Load "StorageBuffer" %107 : f16 | |
| %109 = spirv.IAdd %31, %cst26_i32 : i32 | |
| %110 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %111 = spirv.Load "StorageBuffer" %110 : f16 | |
| %112 = spirv.IAdd %31, %cst27_i32 : i32 | |
| %113 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %114 = spirv.Load "StorageBuffer" %113 : f16 | |
| %115 = spirv.IAdd %31, %cst28_i32 : i32 | |
| %116 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %117 = spirv.Load "StorageBuffer" %116 : f16 | |
| %118 = spirv.IAdd %31, %cst29_i32 : i32 | |
| %119 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %118] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %120 = spirv.Load "StorageBuffer" %119 : f16 | |
| %121 = spirv.IAdd %31, %cst30_i32 : i32 | |
| %122 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %123 = spirv.Load "StorageBuffer" %122 : f16 | |
| %124 = spirv.IAdd %31, %cst31_i32 : i32 | |
| %125 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %124] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %126 = spirv.Load "StorageBuffer" %125 : f16 | |
| %127 = spirv.IAdd %31, %cst32_i32 : i32 | |
| %128 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %127] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %129 = spirv.Load "StorageBuffer" %128 : f16 | |
| %130 = spirv.IAdd %31, %cst33_i32 : i32 | |
| %131 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %130] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %132 = spirv.Load "StorageBuffer" %131 : f16 | |
| %133 = spirv.IAdd %31, %cst34_i32 : i32 | |
| %134 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %135 = spirv.Load "StorageBuffer" %134 : f16 | |
| %136 = spirv.IAdd %31, %cst35_i32 : i32 | |
| %137 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %136] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %138 = spirv.Load "StorageBuffer" %137 : f16 | |
| %139 = spirv.IAdd %31, %cst36_i32 : i32 | |
| %140 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %141 = spirv.Load "StorageBuffer" %140 : f16 | |
| %142 = spirv.IAdd %31, %cst37_i32 : i32 | |
| %143 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %144 = spirv.Load "StorageBuffer" %143 : f16 | |
| %145 = spirv.IAdd %31, %cst38_i32 : i32 | |
| %146 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %147 = spirv.Load "StorageBuffer" %146 : f16 | |
| %148 = spirv.IAdd %31, %cst39_i32 : i32 | |
| %149 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %150 = spirv.Load "StorageBuffer" %149 : f16 | |
| %151 = spirv.IAdd %31, %cst40_i32 : i32 | |
| %152 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %151] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %153 = spirv.Load "StorageBuffer" %152 : f16 | |
| %154 = spirv.IAdd %31, %cst41_i32 : i32 | |
| %155 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %156 = spirv.Load "StorageBuffer" %155 : f16 | |
| %157 = spirv.IAdd %31, %cst42_i32 : i32 | |
| %158 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %159 = spirv.Load "StorageBuffer" %158 : f16 | |
| %160 = spirv.IAdd %31, %cst43_i32 : i32 | |
| %161 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %160] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %162 = spirv.Load "StorageBuffer" %161 : f16 | |
| %163 = spirv.IAdd %31, %cst44_i32 : i32 | |
| %164 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %165 = spirv.Load "StorageBuffer" %164 : f16 | |
| %166 = spirv.IAdd %31, %cst45_i32 : i32 | |
| %167 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %166] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %168 = spirv.Load "StorageBuffer" %167 : f16 | |
| %169 = spirv.IAdd %31, %cst46_i32 : i32 | |
| %170 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %169] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %171 = spirv.Load "StorageBuffer" %170 : f16 | |
| %172 = spirv.IAdd %31, %cst47_i32 : i32 | |
| %173 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %174 = spirv.Load "StorageBuffer" %173 : f16 | |
| %175 = spirv.IAdd %31, %cst48_i32 : i32 | |
| %176 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %177 = spirv.Load "StorageBuffer" %176 : f16 | |
| %178 = spirv.IAdd %31, %cst49_i32 : i32 | |
| %179 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %180 = spirv.Load "StorageBuffer" %179 : f16 | |
| %181 = spirv.IAdd %31, %cst50_i32 : i32 | |
| %182 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %183 = spirv.Load "StorageBuffer" %182 : f16 | |
| %184 = spirv.IAdd %31, %cst51_i32 : i32 | |
| %185 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %184] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %186 = spirv.Load "StorageBuffer" %185 : f16 | |
| %187 = spirv.IAdd %31, %cst52_i32 : i32 | |
| %188 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %189 = spirv.Load "StorageBuffer" %188 : f16 | |
| %190 = spirv.IAdd %31, %cst53_i32 : i32 | |
| %191 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %190] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %192 = spirv.Load "StorageBuffer" %191 : f16 | |
| %193 = spirv.IAdd %31, %cst54_i32 : i32 | |
| %194 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %193] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %195 = spirv.Load "StorageBuffer" %194 : f16 | |
| %196 = spirv.IAdd %31, %cst55_i32 : i32 | |
| %197 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %196] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %198 = spirv.Load "StorageBuffer" %197 : f16 | |
| %199 = spirv.IAdd %31, %cst56_i32 : i32 | |
| %200 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %199] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %201 = spirv.Load "StorageBuffer" %200 : f16 | |
| %202 = spirv.IAdd %31, %cst57_i32 : i32 | |
| %203 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %202] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %204 = spirv.Load "StorageBuffer" %203 : f16 | |
| %205 = spirv.IAdd %31, %cst58_i32 : i32 | |
| %206 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %205] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %207 = spirv.Load "StorageBuffer" %206 : f16 | |
| %208 = spirv.IAdd %31, %cst59_i32 : i32 | |
| %209 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %208] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %210 = spirv.Load "StorageBuffer" %209 : f16 | |
| %211 = spirv.IAdd %31, %cst60_i32 : i32 | |
| %212 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %211] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %213 = spirv.Load "StorageBuffer" %212 : f16 | |
| %214 = spirv.IAdd %31, %cst61_i32 : i32 | |
| %215 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %214] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %216 = spirv.Load "StorageBuffer" %215 : f16 | |
| %217 = spirv.IAdd %31, %cst62_i32 : i32 | |
| %218 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %217] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %219 = spirv.Load "StorageBuffer" %218 : f16 | |
| %220 = spirv.IAdd %31, %cst63_i32 : i32 | |
| %221 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %220] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %222 = spirv.Load "StorageBuffer" %221 : f16 | |
| %223 = spirv.IAdd %31, %cst64_i32 : i32 | |
| %224 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %223] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %225 = spirv.Load "StorageBuffer" %224 : f16 | |
| %226 = spirv.IAdd %31, %cst65_i32 : i32 | |
| %227 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %226] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %228 = spirv.Load "StorageBuffer" %227 : f16 | |
| %229 = spirv.IAdd %31, %cst66_i32 : i32 | |
| %230 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %229] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %231 = spirv.Load "StorageBuffer" %230 : f16 | |
| %232 = spirv.IAdd %31, %cst67_i32 : i32 | |
| %233 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %232] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %234 = spirv.Load "StorageBuffer" %233 : f16 | |
| %235 = spirv.IAdd %31, %cst68_i32 : i32 | |
| %236 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %235] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %237 = spirv.Load "StorageBuffer" %236 : f16 | |
| %238 = spirv.IAdd %31, %cst69_i32 : i32 | |
| %239 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %238] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %240 = spirv.Load "StorageBuffer" %239 : f16 | |
| %241 = spirv.IAdd %31, %cst70_i32 : i32 | |
| %242 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %241] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %243 = spirv.Load "StorageBuffer" %242 : f16 | |
| %244 = spirv.IAdd %31, %cst71_i32 : i32 | |
| %245 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %244] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %246 = spirv.Load "StorageBuffer" %245 : f16 | |
| %247 = spirv.IAdd %31, %cst72_i32 : i32 | |
| %248 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %249 = spirv.Load "StorageBuffer" %248 : f16 | |
| %250 = spirv.IAdd %31, %cst73_i32 : i32 | |
| %251 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %250] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %252 = spirv.Load "StorageBuffer" %251 : f16 | |
| %253 = spirv.IAdd %31, %cst74_i32 : i32 | |
| %254 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %253] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %255 = spirv.Load "StorageBuffer" %254 : f16 | |
| %256 = spirv.IAdd %31, %cst75_i32 : i32 | |
| %257 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %256] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %258 = spirv.Load "StorageBuffer" %257 : f16 | |
| %259 = spirv.IAdd %31, %cst76_i32 : i32 | |
| %260 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %259] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %261 = spirv.Load "StorageBuffer" %260 : f16 | |
| %262 = spirv.IAdd %31, %cst77_i32 : i32 | |
| %263 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %262] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %264 = spirv.Load "StorageBuffer" %263 : f16 | |
| %265 = spirv.IAdd %31, %cst78_i32 : i32 | |
| %266 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %267 = spirv.Load "StorageBuffer" %266 : f16 | |
| %268 = spirv.IAdd %31, %cst79_i32 : i32 | |
| %269 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %268] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %270 = spirv.Load "StorageBuffer" %269 : f16 | |
| %271 = spirv.IAdd %31, %cst80_i32 : i32 | |
| %272 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %271] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %273 = spirv.Load "StorageBuffer" %272 : f16 | |
| %274 = spirv.IAdd %31, %cst81_i32 : i32 | |
| %275 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %274] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %276 = spirv.Load "StorageBuffer" %275 : f16 | |
| %277 = spirv.IAdd %31, %cst82_i32 : i32 | |
| %278 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %277] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %279 = spirv.Load "StorageBuffer" %278 : f16 | |
| %280 = spirv.IAdd %31, %cst83_i32 : i32 | |
| %281 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %280] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %282 = spirv.Load "StorageBuffer" %281 : f16 | |
| %283 = spirv.IAdd %31, %cst84_i32 : i32 | |
| %284 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %283] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %285 = spirv.Load "StorageBuffer" %284 : f16 | |
| %286 = spirv.IAdd %31, %cst85_i32 : i32 | |
| %287 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %286] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %288 = spirv.Load "StorageBuffer" %287 : f16 | |
| %289 = spirv.IAdd %31, %cst86_i32 : i32 | |
| %290 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %289] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %291 = spirv.Load "StorageBuffer" %290 : f16 | |
| %292 = spirv.IAdd %31, %cst87_i32 : i32 | |
| %293 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %292] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %294 = spirv.Load "StorageBuffer" %293 : f16 | |
| %295 = spirv.IAdd %31, %cst88_i32 : i32 | |
| %296 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %295] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %297 = spirv.Load "StorageBuffer" %296 : f16 | |
| %298 = spirv.IAdd %31, %cst89_i32 : i32 | |
| %299 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %298] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %300 = spirv.Load "StorageBuffer" %299 : f16 | |
| %301 = spirv.IAdd %31, %cst90_i32 : i32 | |
| %302 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %301] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %303 = spirv.Load "StorageBuffer" %302 : f16 | |
| %304 = spirv.IAdd %31, %cst91_i32 : i32 | |
| %305 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %304] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %306 = spirv.Load "StorageBuffer" %305 : f16 | |
| %307 = spirv.IAdd %31, %cst92_i32 : i32 | |
| %308 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %307] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %309 = spirv.Load "StorageBuffer" %308 : f16 | |
| %310 = spirv.IAdd %31, %cst93_i32 : i32 | |
| %311 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %310] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %312 = spirv.Load "StorageBuffer" %311 : f16 | |
| %313 = spirv.IAdd %31, %cst94_i32 : i32 | |
| %314 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %313] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %315 = spirv.Load "StorageBuffer" %314 : f16 | |
| %316 = spirv.IAdd %31, %cst95_i32 : i32 | |
| %317 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %316] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %318 = spirv.Load "StorageBuffer" %317 : f16 | |
| %319 = spirv.IAdd %31, %cst96_i32 : i32 | |
| %320 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %319] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %321 = spirv.Load "StorageBuffer" %320 : f16 | |
| %322 = spirv.IAdd %31, %cst97_i32 : i32 | |
| %323 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %322] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %324 = spirv.Load "StorageBuffer" %323 : f16 | |
| %325 = spirv.IAdd %31, %cst98_i32 : i32 | |
| %326 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %325] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %327 = spirv.Load "StorageBuffer" %326 : f16 | |
| %328 = spirv.IAdd %31, %cst99_i32 : i32 | |
| %329 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %328] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %330 = spirv.Load "StorageBuffer" %329 : f16 | |
| %331 = spirv.IAdd %31, %cst100_i32 : i32 | |
| %332 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %331] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %333 = spirv.Load "StorageBuffer" %332 : f16 | |
| %334 = spirv.IAdd %31, %cst101_i32 : i32 | |
| %335 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %334] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %336 = spirv.Load "StorageBuffer" %335 : f16 | |
| %337 = spirv.IAdd %31, %cst102_i32 : i32 | |
| %338 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %337] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %339 = spirv.Load "StorageBuffer" %338 : f16 | |
| %340 = spirv.IAdd %31, %cst103_i32 : i32 | |
| %341 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %340] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %342 = spirv.Load "StorageBuffer" %341 : f16 | |
| %343 = spirv.IAdd %31, %cst104_i32 : i32 | |
| %344 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %343] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %345 = spirv.Load "StorageBuffer" %344 : f16 | |
| %346 = spirv.IAdd %31, %cst105_i32 : i32 | |
| %347 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %346] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %348 = spirv.Load "StorageBuffer" %347 : f16 | |
| %349 = spirv.IAdd %31, %cst106_i32 : i32 | |
| %350 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %349] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %351 = spirv.Load "StorageBuffer" %350 : f16 | |
| %352 = spirv.IAdd %31, %cst107_i32 : i32 | |
| %353 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %352] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %354 = spirv.Load "StorageBuffer" %353 : f16 | |
| %355 = spirv.IAdd %31, %cst108_i32 : i32 | |
| %356 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %355] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %357 = spirv.Load "StorageBuffer" %356 : f16 | |
| %358 = spirv.IAdd %31, %cst109_i32 : i32 | |
| %359 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %358] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %360 = spirv.Load "StorageBuffer" %359 : f16 | |
| %361 = spirv.IAdd %31, %cst110_i32 : i32 | |
| %362 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %361] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %363 = spirv.Load "StorageBuffer" %362 : f16 | |
| %364 = spirv.IAdd %31, %cst111_i32 : i32 | |
| %365 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %364] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %366 = spirv.Load "StorageBuffer" %365 : f16 | |
| %367 = spirv.IAdd %31, %cst112_i32 : i32 | |
| %368 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %367] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %369 = spirv.Load "StorageBuffer" %368 : f16 | |
| %370 = spirv.IAdd %31, %cst113_i32 : i32 | |
| %371 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %370] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %372 = spirv.Load "StorageBuffer" %371 : f16 | |
| %373 = spirv.IAdd %31, %cst114_i32 : i32 | |
| %374 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %373] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %375 = spirv.Load "StorageBuffer" %374 : f16 | |
| %376 = spirv.IAdd %31, %cst115_i32 : i32 | |
| %377 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %376] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %378 = spirv.Load "StorageBuffer" %377 : f16 | |
| %379 = spirv.IAdd %31, %cst116_i32 : i32 | |
| %380 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %379] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %381 = spirv.Load "StorageBuffer" %380 : f16 | |
| %382 = spirv.IAdd %31, %cst117_i32 : i32 | |
| %383 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %382] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %384 = spirv.Load "StorageBuffer" %383 : f16 | |
| %385 = spirv.IAdd %31, %cst118_i32 : i32 | |
| %386 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %385] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %387 = spirv.Load "StorageBuffer" %386 : f16 | |
| %388 = spirv.IAdd %31, %cst119_i32 : i32 | |
| %389 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %388] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %390 = spirv.Load "StorageBuffer" %389 : f16 | |
| %391 = spirv.IAdd %31, %cst120_i32 : i32 | |
| %392 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %391] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %393 = spirv.Load "StorageBuffer" %392 : f16 | |
| %394 = spirv.IAdd %31, %cst121_i32 : i32 | |
| %395 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %394] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %396 = spirv.Load "StorageBuffer" %395 : f16 | |
| %397 = spirv.IAdd %31, %cst122_i32 : i32 | |
| %398 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %397] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %399 = spirv.Load "StorageBuffer" %398 : f16 | |
| %400 = spirv.IAdd %31, %cst123_i32 : i32 | |
| %401 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %400] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %402 = spirv.Load "StorageBuffer" %401 : f16 | |
| %403 = spirv.IAdd %31, %cst124_i32 : i32 | |
| %404 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %403] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %405 = spirv.Load "StorageBuffer" %404 : f16 | |
| %406 = spirv.IAdd %31, %cst125_i32 : i32 | |
| %407 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %406] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %408 = spirv.Load "StorageBuffer" %407 : f16 | |
| %409 = spirv.IAdd %31, %cst126_i32 : i32 | |
| %410 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %409] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %411 = spirv.Load "StorageBuffer" %410 : f16 | |
| %412 = spirv.IAdd %31, %cst127_i32 : i32 | |
| %413 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %412] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %414 = spirv.Load "StorageBuffer" %413 : f16 | |
| %415 = spirv.IAdd %31, %cst128_i32 : i32 | |
| %416 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %415] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %417 = spirv.Load "StorageBuffer" %416 : f16 | |
| %418 = spirv.IAdd %31, %cst129_i32 : i32 | |
| %419 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %418] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %420 = spirv.Load "StorageBuffer" %419 : f16 | |
| %421 = spirv.IAdd %31, %cst130_i32 : i32 | |
| %422 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %421] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %423 = spirv.Load "StorageBuffer" %422 : f16 | |
| %424 = spirv.IAdd %31, %cst131_i32 : i32 | |
| %425 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %424] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %426 = spirv.Load "StorageBuffer" %425 : f16 | |
| %427 = spirv.IAdd %31, %cst132_i32 : i32 | |
| %428 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %427] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %429 = spirv.Load "StorageBuffer" %428 : f16 | |
| %430 = spirv.IAdd %31, %cst133_i32 : i32 | |
| %431 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %430] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %432 = spirv.Load "StorageBuffer" %431 : f16 | |
| %433 = spirv.IAdd %31, %cst134_i32 : i32 | |
| %434 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %433] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %435 = spirv.Load "StorageBuffer" %434 : f16 | |
| %436 = spirv.IAdd %31, %cst135_i32 : i32 | |
| %437 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %436] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %438 = spirv.Load "StorageBuffer" %437 : f16 | |
| %439 = spirv.IAdd %31, %cst136_i32 : i32 | |
| %440 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %439] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %441 = spirv.Load "StorageBuffer" %440 : f16 | |
| %442 = spirv.IAdd %31, %cst137_i32 : i32 | |
| %443 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %442] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %444 = spirv.Load "StorageBuffer" %443 : f16 | |
| %445 = spirv.IAdd %31, %cst138_i32 : i32 | |
| %446 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %445] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %447 = spirv.Load "StorageBuffer" %446 : f16 | |
| %448 = spirv.IAdd %31, %cst139_i32 : i32 | |
| %449 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %448] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %450 = spirv.Load "StorageBuffer" %449 : f16 | |
| %451 = spirv.IAdd %31, %cst140_i32 : i32 | |
| %452 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %451] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %453 = spirv.Load "StorageBuffer" %452 : f16 | |
| %454 = spirv.IAdd %31, %cst141_i32 : i32 | |
| %455 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %454] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %456 = spirv.Load "StorageBuffer" %455 : f16 | |
| %457 = spirv.IAdd %31, %cst142_i32 : i32 | |
| %458 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %457] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %459 = spirv.Load "StorageBuffer" %458 : f16 | |
| %460 = spirv.IAdd %31, %cst143_i32 : i32 | |
| %461 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %460] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %462 = spirv.Load "StorageBuffer" %461 : f16 | |
| %463 = spirv.IAdd %31, %cst144_i32 : i32 | |
| %464 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %463] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %465 = spirv.Load "StorageBuffer" %464 : f16 | |
| %466 = spirv.IAdd %31, %cst145_i32 : i32 | |
| %467 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %466] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %468 = spirv.Load "StorageBuffer" %467 : f16 | |
| %469 = spirv.IAdd %31, %cst146_i32 : i32 | |
| %470 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %469] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %471 = spirv.Load "StorageBuffer" %470 : f16 | |
| %472 = spirv.IAdd %31, %cst147_i32 : i32 | |
| %473 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %472] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %474 = spirv.Load "StorageBuffer" %473 : f16 | |
| %475 = spirv.IAdd %31, %cst148_i32 : i32 | |
| %476 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %475] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %477 = spirv.Load "StorageBuffer" %476 : f16 | |
| %478 = spirv.IAdd %31, %cst149_i32 : i32 | |
| %479 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %478] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %480 = spirv.Load "StorageBuffer" %479 : f16 | |
| %481 = spirv.IAdd %31, %cst150_i32 : i32 | |
| %482 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %481] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %483 = spirv.Load "StorageBuffer" %482 : f16 | |
| %484 = spirv.IAdd %31, %cst151_i32 : i32 | |
| %485 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %484] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %486 = spirv.Load "StorageBuffer" %485 : f16 | |
| %487 = spirv.IAdd %31, %cst152_i32 : i32 | |
| %488 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %487] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %489 = spirv.Load "StorageBuffer" %488 : f16 | |
| %490 = spirv.IAdd %31, %cst153_i32 : i32 | |
| %491 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %490] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %492 = spirv.Load "StorageBuffer" %491 : f16 | |
| %493 = spirv.IAdd %31, %cst154_i32 : i32 | |
| %494 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %493] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %495 = spirv.Load "StorageBuffer" %494 : f16 | |
| %496 = spirv.IAdd %31, %cst155_i32 : i32 | |
| %497 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %496] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %498 = spirv.Load "StorageBuffer" %497 : f16 | |
| %499 = spirv.IAdd %31, %cst156_i32 : i32 | |
| %500 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %499] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %501 = spirv.Load "StorageBuffer" %500 : f16 | |
| %502 = spirv.IAdd %31, %cst157_i32 : i32 | |
| %503 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %502] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %504 = spirv.Load "StorageBuffer" %503 : f16 | |
| %505 = spirv.IAdd %31, %cst158_i32 : i32 | |
| %506 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %505] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %507 = spirv.Load "StorageBuffer" %506 : f16 | |
| %508 = spirv.IAdd %31, %cst159_i32 : i32 | |
| %509 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %508] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %510 = spirv.Load "StorageBuffer" %509 : f16 | |
| %511 = spirv.IAdd %31, %cst160_i32 : i32 | |
| %512 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %511] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %513 = spirv.Load "StorageBuffer" %512 : f16 | |
| %514 = spirv.IAdd %31, %cst161_i32 : i32 | |
| %515 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %514] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %516 = spirv.Load "StorageBuffer" %515 : f16 | |
| %517 = spirv.IAdd %31, %cst162_i32 : i32 | |
| %518 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %517] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %519 = spirv.Load "StorageBuffer" %518 : f16 | |
| %520 = spirv.IAdd %31, %cst163_i32 : i32 | |
| %521 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %520] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %522 = spirv.Load "StorageBuffer" %521 : f16 | |
| %523 = spirv.IAdd %31, %cst164_i32 : i32 | |
| %524 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %523] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %525 = spirv.Load "StorageBuffer" %524 : f16 | |
| %526 = spirv.IAdd %31, %cst165_i32 : i32 | |
| %527 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %526] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %528 = spirv.Load "StorageBuffer" %527 : f16 | |
| %529 = spirv.IAdd %31, %cst166_i32 : i32 | |
| %530 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %529] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %531 = spirv.Load "StorageBuffer" %530 : f16 | |
| %532 = spirv.IAdd %31, %cst167_i32 : i32 | |
| %533 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %532] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %534 = spirv.Load "StorageBuffer" %533 : f16 | |
| %535 = spirv.IAdd %31, %cst168_i32 : i32 | |
| %536 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %535] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %537 = spirv.Load "StorageBuffer" %536 : f16 | |
| %538 = spirv.IAdd %31, %cst169_i32 : i32 | |
| %539 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %538] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %540 = spirv.Load "StorageBuffer" %539 : f16 | |
| %541 = spirv.IAdd %31, %cst170_i32 : i32 | |
| %542 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %541] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %543 = spirv.Load "StorageBuffer" %542 : f16 | |
| %544 = spirv.IAdd %31, %cst171_i32 : i32 | |
| %545 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %544] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %546 = spirv.Load "StorageBuffer" %545 : f16 | |
| %547 = spirv.IAdd %31, %cst172_i32 : i32 | |
| %548 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %547] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %549 = spirv.Load "StorageBuffer" %548 : f16 | |
| %550 = spirv.IAdd %31, %cst173_i32 : i32 | |
| %551 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %550] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %552 = spirv.Load "StorageBuffer" %551 : f16 | |
| %553 = spirv.IAdd %31, %cst174_i32 : i32 | |
| %554 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %553] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %555 = spirv.Load "StorageBuffer" %554 : f16 | |
| %556 = spirv.IAdd %31, %cst175_i32 : i32 | |
| %557 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %556] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %558 = spirv.Load "StorageBuffer" %557 : f16 | |
| %559 = spirv.IAdd %31, %cst176_i32 : i32 | |
| %560 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %559] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %561 = spirv.Load "StorageBuffer" %560 : f16 | |
| %562 = spirv.IAdd %31, %cst177_i32 : i32 | |
| %563 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %562] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %564 = spirv.Load "StorageBuffer" %563 : f16 | |
| %565 = spirv.IAdd %31, %cst178_i32 : i32 | |
| %566 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %565] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %567 = spirv.Load "StorageBuffer" %566 : f16 | |
| %568 = spirv.IAdd %31, %cst179_i32 : i32 | |
| %569 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %568] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %570 = spirv.Load "StorageBuffer" %569 : f16 | |
| %571 = spirv.IAdd %31, %cst180_i32 : i32 | |
| %572 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %571] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %573 = spirv.Load "StorageBuffer" %572 : f16 | |
| %574 = spirv.IAdd %31, %cst181_i32 : i32 | |
| %575 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %574] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %576 = spirv.Load "StorageBuffer" %575 : f16 | |
| %577 = spirv.IAdd %31, %cst182_i32 : i32 | |
| %578 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %577] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %579 = spirv.Load "StorageBuffer" %578 : f16 | |
| %580 = spirv.IAdd %31, %cst183_i32 : i32 | |
| %581 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %580] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %582 = spirv.Load "StorageBuffer" %581 : f16 | |
| %583 = spirv.IAdd %31, %cst184_i32 : i32 | |
| %584 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %583] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %585 = spirv.Load "StorageBuffer" %584 : f16 | |
| %586 = spirv.IAdd %31, %cst185_i32 : i32 | |
| %587 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %586] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %588 = spirv.Load "StorageBuffer" %587 : f16 | |
| %589 = spirv.IAdd %31, %cst186_i32 : i32 | |
| %590 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %589] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %591 = spirv.Load "StorageBuffer" %590 : f16 | |
| %592 = spirv.IAdd %31, %cst187_i32 : i32 | |
| %593 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %592] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %594 = spirv.Load "StorageBuffer" %593 : f16 | |
| %595 = spirv.IAdd %31, %cst188_i32 : i32 | |
| %596 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %595] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %597 = spirv.Load "StorageBuffer" %596 : f16 | |
| %598 = spirv.IAdd %31, %cst189_i32 : i32 | |
| %599 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %598] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %600 = spirv.Load "StorageBuffer" %599 : f16 | |
| %601 = spirv.IAdd %31, %cst190_i32 : i32 | |
| %602 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %601] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %603 = spirv.Load "StorageBuffer" %602 : f16 | |
| %604 = spirv.IAdd %31, %cst191_i32 : i32 | |
| %605 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %604] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %606 = spirv.Load "StorageBuffer" %605 : f16 | |
| %607 = spirv.IAdd %31, %cst192_i32 : i32 | |
| %608 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %607] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %609 = spirv.Load "StorageBuffer" %608 : f16 | |
| %610 = spirv.IAdd %31, %cst193_i32 : i32 | |
| %611 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %610] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %612 = spirv.Load "StorageBuffer" %611 : f16 | |
| %613 = spirv.IAdd %31, %cst194_i32 : i32 | |
| %614 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %613] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %615 = spirv.Load "StorageBuffer" %614 : f16 | |
| %616 = spirv.IAdd %31, %cst195_i32 : i32 | |
| %617 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %616] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %618 = spirv.Load "StorageBuffer" %617 : f16 | |
| %619 = spirv.IAdd %31, %cst196_i32 : i32 | |
| %620 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %619] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %621 = spirv.Load "StorageBuffer" %620 : f16 | |
| %622 = spirv.IAdd %31, %cst197_i32 : i32 | |
| %623 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %622] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %624 = spirv.Load "StorageBuffer" %623 : f16 | |
| %625 = spirv.IAdd %31, %cst198_i32 : i32 | |
| %626 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %625] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %627 = spirv.Load "StorageBuffer" %626 : f16 | |
| %628 = spirv.IAdd %31, %cst199_i32 : i32 | |
| %629 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %628] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %630 = spirv.Load "StorageBuffer" %629 : f16 | |
| %631 = spirv.IAdd %31, %cst200_i32 : i32 | |
| %632 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %631] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %633 = spirv.Load "StorageBuffer" %632 : f16 | |
| %634 = spirv.IAdd %31, %cst201_i32 : i32 | |
| %635 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %634] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %636 = spirv.Load "StorageBuffer" %635 : f16 | |
| %637 = spirv.IAdd %31, %cst202_i32 : i32 | |
| %638 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %637] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %639 = spirv.Load "StorageBuffer" %638 : f16 | |
| %640 = spirv.IAdd %31, %cst203_i32 : i32 | |
| %641 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %640] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %642 = spirv.Load "StorageBuffer" %641 : f16 | |
| %643 = spirv.IAdd %31, %cst204_i32 : i32 | |
| %644 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %643] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %645 = spirv.Load "StorageBuffer" %644 : f16 | |
| %646 = spirv.IAdd %31, %cst205_i32 : i32 | |
| %647 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %646] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %648 = spirv.Load "StorageBuffer" %647 : f16 | |
| %649 = spirv.IAdd %31, %cst206_i32 : i32 | |
| %650 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %649] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %651 = spirv.Load "StorageBuffer" %650 : f16 | |
| %652 = spirv.IAdd %31, %cst207_i32 : i32 | |
| %653 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %652] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %654 = spirv.Load "StorageBuffer" %653 : f16 | |
| %655 = spirv.IAdd %31, %cst208_i32 : i32 | |
| %656 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %655] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %657 = spirv.Load "StorageBuffer" %656 : f16 | |
| %658 = spirv.IAdd %31, %cst209_i32 : i32 | |
| %659 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %658] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %660 = spirv.Load "StorageBuffer" %659 : f16 | |
| %661 = spirv.IAdd %31, %cst210_i32 : i32 | |
| %662 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %661] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %663 = spirv.Load "StorageBuffer" %662 : f16 | |
| %664 = spirv.IAdd %31, %cst211_i32 : i32 | |
| %665 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %664] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %666 = spirv.Load "StorageBuffer" %665 : f16 | |
| %667 = spirv.IAdd %31, %cst212_i32 : i32 | |
| %668 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %667] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %669 = spirv.Load "StorageBuffer" %668 : f16 | |
| %670 = spirv.IAdd %31, %cst213_i32 : i32 | |
| %671 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %670] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %672 = spirv.Load "StorageBuffer" %671 : f16 | |
| %673 = spirv.IAdd %31, %cst214_i32 : i32 | |
| %674 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %673] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %675 = spirv.Load "StorageBuffer" %674 : f16 | |
| %676 = spirv.IAdd %31, %cst215_i32 : i32 | |
| %677 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %676] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %678 = spirv.Load "StorageBuffer" %677 : f16 | |
| %679 = spirv.IAdd %31, %cst216_i32 : i32 | |
| %680 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %679] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %681 = spirv.Load "StorageBuffer" %680 : f16 | |
| %682 = spirv.IAdd %31, %cst217_i32 : i32 | |
| %683 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %682] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %684 = spirv.Load "StorageBuffer" %683 : f16 | |
| %685 = spirv.IAdd %31, %cst218_i32 : i32 | |
| %686 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %685] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %687 = spirv.Load "StorageBuffer" %686 : f16 | |
| %688 = spirv.IAdd %31, %cst219_i32 : i32 | |
| %689 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %688] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %690 = spirv.Load "StorageBuffer" %689 : f16 | |
| %691 = spirv.IAdd %31, %cst220_i32 : i32 | |
| %692 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %691] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %693 = spirv.Load "StorageBuffer" %692 : f16 | |
| %694 = spirv.IAdd %31, %cst221_i32 : i32 | |
| %695 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %694] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %696 = spirv.Load "StorageBuffer" %695 : f16 | |
| %697 = spirv.IAdd %31, %cst222_i32 : i32 | |
| %698 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %697] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %699 = spirv.Load "StorageBuffer" %698 : f16 | |
| %700 = spirv.IAdd %31, %cst223_i32 : i32 | |
| %701 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %700] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %702 = spirv.Load "StorageBuffer" %701 : f16 | |
| %703 = spirv.IAdd %31, %cst224_i32 : i32 | |
| %704 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %703] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %705 = spirv.Load "StorageBuffer" %704 : f16 | |
| %706 = spirv.IAdd %31, %cst225_i32 : i32 | |
| %707 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %706] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %708 = spirv.Load "StorageBuffer" %707 : f16 | |
| %709 = spirv.IAdd %31, %cst226_i32 : i32 | |
| %710 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %709] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %711 = spirv.Load "StorageBuffer" %710 : f16 | |
| %712 = spirv.IAdd %31, %cst227_i32 : i32 | |
| %713 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %712] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %714 = spirv.Load "StorageBuffer" %713 : f16 | |
| %715 = spirv.IAdd %31, %cst228_i32 : i32 | |
| %716 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %715] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %717 = spirv.Load "StorageBuffer" %716 : f16 | |
| %718 = spirv.IAdd %31, %cst229_i32 : i32 | |
| %719 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %718] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %720 = spirv.Load "StorageBuffer" %719 : f16 | |
| %721 = spirv.IAdd %31, %cst230_i32 : i32 | |
| %722 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %721] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %723 = spirv.Load "StorageBuffer" %722 : f16 | |
| %724 = spirv.IAdd %31, %cst231_i32 : i32 | |
| %725 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %724] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %726 = spirv.Load "StorageBuffer" %725 : f16 | |
| %727 = spirv.IAdd %31, %cst232_i32 : i32 | |
| %728 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %727] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %729 = spirv.Load "StorageBuffer" %728 : f16 | |
| %730 = spirv.IAdd %31, %cst233_i32 : i32 | |
| %731 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %730] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %732 = spirv.Load "StorageBuffer" %731 : f16 | |
| %733 = spirv.IAdd %31, %cst234_i32 : i32 | |
| %734 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %733] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %735 = spirv.Load "StorageBuffer" %734 : f16 | |
| %736 = spirv.IAdd %31, %cst235_i32 : i32 | |
| %737 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %736] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %738 = spirv.Load "StorageBuffer" %737 : f16 | |
| %739 = spirv.IAdd %31, %cst236_i32 : i32 | |
| %740 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %739] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %741 = spirv.Load "StorageBuffer" %740 : f16 | |
| %742 = spirv.IAdd %31, %cst237_i32 : i32 | |
| %743 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %742] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %744 = spirv.Load "StorageBuffer" %743 : f16 | |
| %745 = spirv.IAdd %31, %cst238_i32 : i32 | |
| %746 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %745] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %747 = spirv.Load "StorageBuffer" %746 : f16 | |
| %748 = spirv.IAdd %31, %cst239_i32 : i32 | |
| %749 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %748] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %750 = spirv.Load "StorageBuffer" %749 : f16 | |
| %751 = spirv.IAdd %31, %cst240_i32 : i32 | |
| %752 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %751] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %753 = spirv.Load "StorageBuffer" %752 : f16 | |
| %754 = spirv.IAdd %31, %cst241_i32 : i32 | |
| %755 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %754] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %756 = spirv.Load "StorageBuffer" %755 : f16 | |
| %757 = spirv.IAdd %31, %cst242_i32 : i32 | |
| %758 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %757] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %759 = spirv.Load "StorageBuffer" %758 : f16 | |
| %760 = spirv.IAdd %31, %cst243_i32 : i32 | |
| %761 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %760] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %762 = spirv.Load "StorageBuffer" %761 : f16 | |
| %763 = spirv.IAdd %31, %cst244_i32 : i32 | |
| %764 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %763] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %765 = spirv.Load "StorageBuffer" %764 : f16 | |
| %766 = spirv.IAdd %31, %cst245_i32 : i32 | |
| %767 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %766] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %768 = spirv.Load "StorageBuffer" %767 : f16 | |
| %769 = spirv.IAdd %31, %cst246_i32 : i32 | |
| %770 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %769] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %771 = spirv.Load "StorageBuffer" %770 : f16 | |
| %772 = spirv.IAdd %31, %cst247_i32 : i32 | |
| %773 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %772] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %774 = spirv.Load "StorageBuffer" %773 : f16 | |
| %775 = spirv.IAdd %31, %cst248_i32 : i32 | |
| %776 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %775] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %777 = spirv.Load "StorageBuffer" %776 : f16 | |
| %778 = spirv.IAdd %31, %cst249_i32 : i32 | |
| %779 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %778] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %780 = spirv.Load "StorageBuffer" %779 : f16 | |
| %781 = spirv.IAdd %31, %cst250_i32 : i32 | |
| %782 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %781] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %783 = spirv.Load "StorageBuffer" %782 : f16 | |
| %784 = spirv.IAdd %31, %cst251_i32 : i32 | |
| %785 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %784] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %786 = spirv.Load "StorageBuffer" %785 : f16 | |
| %787 = spirv.IAdd %31, %cst252_i32 : i32 | |
| %788 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %787] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %789 = spirv.Load "StorageBuffer" %788 : f16 | |
| %790 = spirv.IAdd %31, %cst253_i32 : i32 | |
| %791 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %790] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %792 = spirv.Load "StorageBuffer" %791 : f16 | |
| %793 = spirv.IAdd %31, %cst254_i32 : i32 | |
| %794 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %793] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %795 = spirv.Load "StorageBuffer" %794 : f16 | |
| %796 = spirv.IAdd %31, %cst255_i32 : i32 | |
| %797 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %796] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %798 = spirv.Load "StorageBuffer" %797 : f16 | |
| %799 = spirv.IAdd %31, %cst256_i32 : i32 | |
| %800 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %799] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %801 = spirv.Load "StorageBuffer" %800 : f16 | |
| %802 = spirv.IAdd %31, %cst257_i32 : i32 | |
| %803 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %802] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %804 = spirv.Load "StorageBuffer" %803 : f16 | |
| %805 = spirv.IAdd %31, %cst258_i32 : i32 | |
| %806 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %805] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %807 = spirv.Load "StorageBuffer" %806 : f16 | |
| %808 = spirv.IAdd %31, %cst259_i32 : i32 | |
| %809 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %808] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %810 = spirv.Load "StorageBuffer" %809 : f16 | |
| %811 = spirv.IAdd %31, %cst260_i32 : i32 | |
| %812 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %811] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %813 = spirv.Load "StorageBuffer" %812 : f16 | |
| %814 = spirv.IAdd %31, %cst261_i32 : i32 | |
| %815 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %814] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %816 = spirv.Load "StorageBuffer" %815 : f16 | |
| %817 = spirv.IAdd %31, %cst262_i32 : i32 | |
| %818 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %817] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %819 = spirv.Load "StorageBuffer" %818 : f16 | |
| %820 = spirv.IAdd %31, %cst263_i32 : i32 | |
| %821 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %820] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %822 = spirv.Load "StorageBuffer" %821 : f16 | |
| %823 = spirv.IAdd %31, %cst264_i32 : i32 | |
| %824 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %823] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %825 = spirv.Load "StorageBuffer" %824 : f16 | |
| %826 = spirv.IAdd %31, %cst265_i32 : i32 | |
| %827 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %826] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %828 = spirv.Load "StorageBuffer" %827 : f16 | |
| %829 = spirv.IAdd %31, %cst266_i32 : i32 | |
| %830 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %829] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %831 = spirv.Load "StorageBuffer" %830 : f16 | |
| %832 = spirv.IAdd %31, %cst267_i32 : i32 | |
| %833 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %832] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %834 = spirv.Load "StorageBuffer" %833 : f16 | |
| %835 = spirv.IAdd %31, %cst268_i32 : i32 | |
| %836 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %835] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %837 = spirv.Load "StorageBuffer" %836 : f16 | |
| %838 = spirv.IAdd %31, %cst269_i32 : i32 | |
| %839 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %838] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %840 = spirv.Load "StorageBuffer" %839 : f16 | |
| %841 = spirv.IAdd %31, %cst270_i32 : i32 | |
| %842 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %841] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %843 = spirv.Load "StorageBuffer" %842 : f16 | |
| %844 = spirv.IAdd %31, %cst271_i32 : i32 | |
| %845 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %844] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %846 = spirv.Load "StorageBuffer" %845 : f16 | |
| %847 = spirv.IAdd %31, %cst272_i32 : i32 | |
| %848 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %847] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %849 = spirv.Load "StorageBuffer" %848 : f16 | |
| %850 = spirv.IAdd %31, %cst273_i32 : i32 | |
| %851 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %850] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %852 = spirv.Load "StorageBuffer" %851 : f16 | |
| %853 = spirv.IAdd %31, %cst274_i32 : i32 | |
| %854 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %853] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %855 = spirv.Load "StorageBuffer" %854 : f16 | |
| %856 = spirv.IAdd %31, %cst275_i32 : i32 | |
| %857 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %856] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %858 = spirv.Load "StorageBuffer" %857 : f16 | |
| %859 = spirv.IAdd %31, %cst276_i32 : i32 | |
| %860 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %859] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %861 = spirv.Load "StorageBuffer" %860 : f16 | |
| %862 = spirv.IAdd %31, %cst277_i32 : i32 | |
| %863 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %862] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %864 = spirv.Load "StorageBuffer" %863 : f16 | |
| %865 = spirv.IAdd %31, %cst278_i32 : i32 | |
| %866 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %865] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %867 = spirv.Load "StorageBuffer" %866 : f16 | |
| %868 = spirv.IAdd %31, %cst279_i32 : i32 | |
| %869 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %868] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %870 = spirv.Load "StorageBuffer" %869 : f16 | |
| %871 = spirv.IAdd %31, %cst280_i32 : i32 | |
| %872 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %871] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %873 = spirv.Load "StorageBuffer" %872 : f16 | |
| %874 = spirv.IAdd %31, %cst281_i32 : i32 | |
| %875 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %874] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %876 = spirv.Load "StorageBuffer" %875 : f16 | |
| %877 = spirv.IAdd %31, %cst282_i32 : i32 | |
| %878 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %877] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %879 = spirv.Load "StorageBuffer" %878 : f16 | |
| %880 = spirv.IAdd %31, %cst283_i32 : i32 | |
| %881 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %880] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %882 = spirv.Load "StorageBuffer" %881 : f16 | |
| %883 = spirv.IAdd %31, %cst284_i32 : i32 | |
| %884 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %883] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %885 = spirv.Load "StorageBuffer" %884 : f16 | |
| %886 = spirv.IAdd %31, %cst285_i32 : i32 | |
| %887 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %886] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %888 = spirv.Load "StorageBuffer" %887 : f16 | |
| %889 = spirv.IAdd %31, %cst286_i32 : i32 | |
| %890 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %889] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %891 = spirv.Load "StorageBuffer" %890 : f16 | |
| %892 = spirv.IAdd %31, %cst287_i32 : i32 | |
| %893 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %892] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %894 = spirv.Load "StorageBuffer" %893 : f16 | |
| %895 = spirv.IAdd %31, %cst288_i32 : i32 | |
| %896 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %895] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %897 = spirv.Load "StorageBuffer" %896 : f16 | |
| %898 = spirv.IAdd %31, %cst289_i32 : i32 | |
| %899 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %898] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %900 = spirv.Load "StorageBuffer" %899 : f16 | |
| %901 = spirv.IAdd %31, %cst290_i32 : i32 | |
| %902 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %901] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %903 = spirv.Load "StorageBuffer" %902 : f16 | |
| %904 = spirv.IAdd %31, %cst291_i32 : i32 | |
| %905 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %904] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %906 = spirv.Load "StorageBuffer" %905 : f16 | |
| %907 = spirv.IAdd %31, %cst292_i32 : i32 | |
| %908 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %907] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %909 = spirv.Load "StorageBuffer" %908 : f16 | |
| %910 = spirv.IAdd %31, %cst293_i32 : i32 | |
| %911 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %910] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %912 = spirv.Load "StorageBuffer" %911 : f16 | |
| %913 = spirv.IAdd %31, %cst294_i32 : i32 | |
| %914 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %913] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %915 = spirv.Load "StorageBuffer" %914 : f16 | |
| %916 = spirv.IAdd %31, %cst295_i32 : i32 | |
| %917 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %916] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %918 = spirv.Load "StorageBuffer" %917 : f16 | |
| %919 = spirv.IAdd %31, %cst296_i32 : i32 | |
| %920 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %919] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %921 = spirv.Load "StorageBuffer" %920 : f16 | |
| %922 = spirv.IAdd %31, %cst297_i32 : i32 | |
| %923 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %922] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %924 = spirv.Load "StorageBuffer" %923 : f16 | |
| %925 = spirv.IAdd %31, %cst298_i32 : i32 | |
| %926 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %925] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %927 = spirv.Load "StorageBuffer" %926 : f16 | |
| %928 = spirv.IAdd %31, %cst299_i32 : i32 | |
| %929 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %928] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %930 = spirv.Load "StorageBuffer" %929 : f16 | |
| %931 = spirv.IAdd %31, %cst300_i32 : i32 | |
| %932 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %931] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %933 = spirv.Load "StorageBuffer" %932 : f16 | |
| %934 = spirv.IAdd %31, %cst301_i32 : i32 | |
| %935 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %934] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %936 = spirv.Load "StorageBuffer" %935 : f16 | |
| %937 = spirv.IAdd %31, %cst302_i32 : i32 | |
| %938 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %937] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %939 = spirv.Load "StorageBuffer" %938 : f16 | |
| %940 = spirv.IAdd %31, %cst303_i32 : i32 | |
| %941 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %940] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %942 = spirv.Load "StorageBuffer" %941 : f16 | |
| %943 = spirv.IAdd %31, %cst304_i32 : i32 | |
| %944 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %943] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %945 = spirv.Load "StorageBuffer" %944 : f16 | |
| %946 = spirv.IAdd %31, %cst305_i32 : i32 | |
| %947 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %946] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %948 = spirv.Load "StorageBuffer" %947 : f16 | |
| %949 = spirv.IAdd %31, %cst306_i32 : i32 | |
| %950 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %949] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %951 = spirv.Load "StorageBuffer" %950 : f16 | |
| %952 = spirv.IAdd %31, %cst307_i32 : i32 | |
| %953 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %952] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32 | |
| %954 = spirv.Load "StorageBuffer" %953 : f16 | |
| %955 = spirv.CompositeConstruct %33, %264, %495, %726 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %956 = spirv.CompositeConstruct %36, %267, %498, %729 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %957 = spirv.GL.FMax %956, %955 : vector<4xf16> | |
| %958 = spirv.IsNan %956 : vector<4xf16> | |
| %959 = spirv.IsNan %955 : vector<4xf16> | |
| %960 = spirv.Select %958, %956, %957 : vector<4xi1>, vector<4xf16> | |
| %961 = spirv.Select %959, %955, %960 : vector<4xi1>, vector<4xf16> | |
| %962 = spirv.CompositeConstruct %39, %270, %501, %732 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %963 = spirv.GL.FMax %962, %961 : vector<4xf16> | |
| %964 = spirv.IsNan %962 : vector<4xf16> | |
| %965 = spirv.IsNan %961 : vector<4xf16> | |
| %966 = spirv.Select %964, %962, %963 : vector<4xi1>, vector<4xf16> | |
| %967 = spirv.Select %965, %961, %966 : vector<4xi1>, vector<4xf16> | |
| %968 = spirv.CompositeConstruct %42, %273, %504, %735 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %969 = spirv.GL.FMax %968, %967 : vector<4xf16> | |
| %970 = spirv.IsNan %968 : vector<4xf16> | |
| %971 = spirv.IsNan %967 : vector<4xf16> | |
| %972 = spirv.Select %970, %968, %969 : vector<4xi1>, vector<4xf16> | |
| %973 = spirv.Select %971, %967, %972 : vector<4xi1>, vector<4xf16> | |
| %974 = spirv.CompositeConstruct %45, %276, %507, %738 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %975 = spirv.GL.FMax %974, %973 : vector<4xf16> | |
| %976 = spirv.IsNan %974 : vector<4xf16> | |
| %977 = spirv.IsNan %973 : vector<4xf16> | |
| %978 = spirv.Select %976, %974, %975 : vector<4xi1>, vector<4xf16> | |
| %979 = spirv.Select %977, %973, %978 : vector<4xi1>, vector<4xf16> | |
| %980 = spirv.CompositeConstruct %48, %279, %510, %741 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %981 = spirv.GL.FMax %980, %979 : vector<4xf16> | |
| %982 = spirv.IsNan %980 : vector<4xf16> | |
| %983 = spirv.IsNan %979 : vector<4xf16> | |
| %984 = spirv.Select %982, %980, %981 : vector<4xi1>, vector<4xf16> | |
| %985 = spirv.Select %983, %979, %984 : vector<4xi1>, vector<4xf16> | |
| %986 = spirv.CompositeConstruct %51, %282, %513, %744 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %987 = spirv.GL.FMax %986, %985 : vector<4xf16> | |
| %988 = spirv.IsNan %986 : vector<4xf16> | |
| %989 = spirv.IsNan %985 : vector<4xf16> | |
| %990 = spirv.Select %988, %986, %987 : vector<4xi1>, vector<4xf16> | |
| %991 = spirv.Select %989, %985, %990 : vector<4xi1>, vector<4xf16> | |
| %992 = spirv.CompositeConstruct %54, %285, %516, %747 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %993 = spirv.GL.FMax %992, %991 : vector<4xf16> | |
| %994 = spirv.IsNan %992 : vector<4xf16> | |
| %995 = spirv.IsNan %991 : vector<4xf16> | |
| %996 = spirv.Select %994, %992, %993 : vector<4xi1>, vector<4xf16> | |
| %997 = spirv.Select %995, %991, %996 : vector<4xi1>, vector<4xf16> | |
| %998 = spirv.CompositeConstruct %57, %288, %519, %750 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %999 = spirv.GL.FMax %998, %997 : vector<4xf16> | |
| %1000 = spirv.IsNan %998 : vector<4xf16> | |
| %1001 = spirv.IsNan %997 : vector<4xf16> | |
| %1002 = spirv.Select %1000, %998, %999 : vector<4xi1>, vector<4xf16> | |
| %1003 = spirv.Select %1001, %997, %1002 : vector<4xi1>, vector<4xf16> | |
| %1004 = spirv.CompositeConstruct %60, %291, %522, %753 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1005 = spirv.GL.FMax %1004, %1003 : vector<4xf16> | |
| %1006 = spirv.IsNan %1004 : vector<4xf16> | |
| %1007 = spirv.IsNan %1003 : vector<4xf16> | |
| %1008 = spirv.Select %1006, %1004, %1005 : vector<4xi1>, vector<4xf16> | |
| %1009 = spirv.Select %1007, %1003, %1008 : vector<4xi1>, vector<4xf16> | |
| %1010 = spirv.CompositeConstruct %63, %294, %525, %756 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1011 = spirv.GL.FMax %1010, %1009 : vector<4xf16> | |
| %1012 = spirv.IsNan %1010 : vector<4xf16> | |
| %1013 = spirv.IsNan %1009 : vector<4xf16> | |
| %1014 = spirv.Select %1012, %1010, %1011 : vector<4xi1>, vector<4xf16> | |
| %1015 = spirv.Select %1013, %1009, %1014 : vector<4xi1>, vector<4xf16> | |
| %1016 = spirv.CompositeConstruct %66, %297, %528, %759 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1017 = spirv.GL.FMax %1016, %1015 : vector<4xf16> | |
| %1018 = spirv.IsNan %1016 : vector<4xf16> | |
| %1019 = spirv.IsNan %1015 : vector<4xf16> | |
| %1020 = spirv.Select %1018, %1016, %1017 : vector<4xi1>, vector<4xf16> | |
| %1021 = spirv.Select %1019, %1015, %1020 : vector<4xi1>, vector<4xf16> | |
| %1022 = spirv.CompositeConstruct %69, %300, %531, %762 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1023 = spirv.GL.FMax %1022, %1021 : vector<4xf16> | |
| %1024 = spirv.IsNan %1022 : vector<4xf16> | |
| %1025 = spirv.IsNan %1021 : vector<4xf16> | |
| %1026 = spirv.Select %1024, %1022, %1023 : vector<4xi1>, vector<4xf16> | |
| %1027 = spirv.Select %1025, %1021, %1026 : vector<4xi1>, vector<4xf16> | |
| %1028 = spirv.CompositeConstruct %72, %303, %534, %765 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1029 = spirv.GL.FMax %1028, %1027 : vector<4xf16> | |
| %1030 = spirv.IsNan %1028 : vector<4xf16> | |
| %1031 = spirv.IsNan %1027 : vector<4xf16> | |
| %1032 = spirv.Select %1030, %1028, %1029 : vector<4xi1>, vector<4xf16> | |
| %1033 = spirv.Select %1031, %1027, %1032 : vector<4xi1>, vector<4xf16> | |
| %1034 = spirv.CompositeConstruct %75, %306, %537, %768 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1035 = spirv.GL.FMax %1034, %1033 : vector<4xf16> | |
| %1036 = spirv.IsNan %1034 : vector<4xf16> | |
| %1037 = spirv.IsNan %1033 : vector<4xf16> | |
| %1038 = spirv.Select %1036, %1034, %1035 : vector<4xi1>, vector<4xf16> | |
| %1039 = spirv.Select %1037, %1033, %1038 : vector<4xi1>, vector<4xf16> | |
| %1040 = spirv.CompositeConstruct %78, %309, %540, %771 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1041 = spirv.GL.FMax %1040, %1039 : vector<4xf16> | |
| %1042 = spirv.IsNan %1040 : vector<4xf16> | |
| %1043 = spirv.IsNan %1039 : vector<4xf16> | |
| %1044 = spirv.Select %1042, %1040, %1041 : vector<4xi1>, vector<4xf16> | |
| %1045 = spirv.Select %1043, %1039, %1044 : vector<4xi1>, vector<4xf16> | |
| %1046 = spirv.CompositeConstruct %81, %312, %543, %774 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1047 = spirv.GL.FMax %1046, %1045 : vector<4xf16> | |
| %1048 = spirv.IsNan %1046 : vector<4xf16> | |
| %1049 = spirv.IsNan %1045 : vector<4xf16> | |
| %1050 = spirv.Select %1048, %1046, %1047 : vector<4xi1>, vector<4xf16> | |
| %1051 = spirv.Select %1049, %1045, %1050 : vector<4xi1>, vector<4xf16> | |
| %1052 = spirv.CompositeConstruct %84, %315, %546, %777 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1053 = spirv.GL.FMax %1052, %1051 : vector<4xf16> | |
| %1054 = spirv.IsNan %1052 : vector<4xf16> | |
| %1055 = spirv.IsNan %1051 : vector<4xf16> | |
| %1056 = spirv.Select %1054, %1052, %1053 : vector<4xi1>, vector<4xf16> | |
| %1057 = spirv.Select %1055, %1051, %1056 : vector<4xi1>, vector<4xf16> | |
| %1058 = spirv.CompositeConstruct %87, %318, %549, %780 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1059 = spirv.GL.FMax %1058, %1057 : vector<4xf16> | |
| %1060 = spirv.IsNan %1058 : vector<4xf16> | |
| %1061 = spirv.IsNan %1057 : vector<4xf16> | |
| %1062 = spirv.Select %1060, %1058, %1059 : vector<4xi1>, vector<4xf16> | |
| %1063 = spirv.Select %1061, %1057, %1062 : vector<4xi1>, vector<4xf16> | |
| %1064 = spirv.CompositeConstruct %90, %321, %552, %783 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1065 = spirv.GL.FMax %1064, %1063 : vector<4xf16> | |
| %1066 = spirv.IsNan %1064 : vector<4xf16> | |
| %1067 = spirv.IsNan %1063 : vector<4xf16> | |
| %1068 = spirv.Select %1066, %1064, %1065 : vector<4xi1>, vector<4xf16> | |
| %1069 = spirv.Select %1067, %1063, %1068 : vector<4xi1>, vector<4xf16> | |
| %1070 = spirv.CompositeConstruct %93, %324, %555, %786 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1071 = spirv.GL.FMax %1070, %1069 : vector<4xf16> | |
| %1072 = spirv.IsNan %1070 : vector<4xf16> | |
| %1073 = spirv.IsNan %1069 : vector<4xf16> | |
| %1074 = spirv.Select %1072, %1070, %1071 : vector<4xi1>, vector<4xf16> | |
| %1075 = spirv.Select %1073, %1069, %1074 : vector<4xi1>, vector<4xf16> | |
| %1076 = spirv.CompositeConstruct %96, %327, %558, %789 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1077 = spirv.GL.FMax %1076, %1075 : vector<4xf16> | |
| %1078 = spirv.IsNan %1076 : vector<4xf16> | |
| %1079 = spirv.IsNan %1075 : vector<4xf16> | |
| %1080 = spirv.Select %1078, %1076, %1077 : vector<4xi1>, vector<4xf16> | |
| %1081 = spirv.Select %1079, %1075, %1080 : vector<4xi1>, vector<4xf16> | |
| %1082 = spirv.CompositeConstruct %99, %330, %561, %792 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1083 = spirv.GL.FMax %1082, %1081 : vector<4xf16> | |
| %1084 = spirv.IsNan %1082 : vector<4xf16> | |
| %1085 = spirv.IsNan %1081 : vector<4xf16> | |
| %1086 = spirv.Select %1084, %1082, %1083 : vector<4xi1>, vector<4xf16> | |
| %1087 = spirv.Select %1085, %1081, %1086 : vector<4xi1>, vector<4xf16> | |
| %1088 = spirv.CompositeConstruct %102, %333, %564, %795 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1089 = spirv.GL.FMax %1088, %1087 : vector<4xf16> | |
| %1090 = spirv.IsNan %1088 : vector<4xf16> | |
| %1091 = spirv.IsNan %1087 : vector<4xf16> | |
| %1092 = spirv.Select %1090, %1088, %1089 : vector<4xi1>, vector<4xf16> | |
| %1093 = spirv.Select %1091, %1087, %1092 : vector<4xi1>, vector<4xf16> | |
| %1094 = spirv.CompositeConstruct %105, %336, %567, %798 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1095 = spirv.GL.FMax %1094, %1093 : vector<4xf16> | |
| %1096 = spirv.IsNan %1094 : vector<4xf16> | |
| %1097 = spirv.IsNan %1093 : vector<4xf16> | |
| %1098 = spirv.Select %1096, %1094, %1095 : vector<4xi1>, vector<4xf16> | |
| %1099 = spirv.Select %1097, %1093, %1098 : vector<4xi1>, vector<4xf16> | |
| %1100 = spirv.CompositeConstruct %108, %339, %570, %801 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1101 = spirv.GL.FMax %1100, %1099 : vector<4xf16> | |
| %1102 = spirv.IsNan %1100 : vector<4xf16> | |
| %1103 = spirv.IsNan %1099 : vector<4xf16> | |
| %1104 = spirv.Select %1102, %1100, %1101 : vector<4xi1>, vector<4xf16> | |
| %1105 = spirv.Select %1103, %1099, %1104 : vector<4xi1>, vector<4xf16> | |
| %1106 = spirv.CompositeConstruct %111, %342, %573, %804 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1107 = spirv.GL.FMax %1106, %1105 : vector<4xf16> | |
| %1108 = spirv.IsNan %1106 : vector<4xf16> | |
| %1109 = spirv.IsNan %1105 : vector<4xf16> | |
| %1110 = spirv.Select %1108, %1106, %1107 : vector<4xi1>, vector<4xf16> | |
| %1111 = spirv.Select %1109, %1105, %1110 : vector<4xi1>, vector<4xf16> | |
| %1112 = spirv.CompositeConstruct %114, %345, %576, %807 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1113 = spirv.GL.FMax %1112, %1111 : vector<4xf16> | |
| %1114 = spirv.IsNan %1112 : vector<4xf16> | |
| %1115 = spirv.IsNan %1111 : vector<4xf16> | |
| %1116 = spirv.Select %1114, %1112, %1113 : vector<4xi1>, vector<4xf16> | |
| %1117 = spirv.Select %1115, %1111, %1116 : vector<4xi1>, vector<4xf16> | |
| %1118 = spirv.CompositeConstruct %117, %348, %579, %810 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1119 = spirv.GL.FMax %1118, %1117 : vector<4xf16> | |
| %1120 = spirv.IsNan %1118 : vector<4xf16> | |
| %1121 = spirv.IsNan %1117 : vector<4xf16> | |
| %1122 = spirv.Select %1120, %1118, %1119 : vector<4xi1>, vector<4xf16> | |
| %1123 = spirv.Select %1121, %1117, %1122 : vector<4xi1>, vector<4xf16> | |
| %1124 = spirv.CompositeConstruct %120, %351, %582, %813 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1125 = spirv.GL.FMax %1124, %1123 : vector<4xf16> | |
| %1126 = spirv.IsNan %1124 : vector<4xf16> | |
| %1127 = spirv.IsNan %1123 : vector<4xf16> | |
| %1128 = spirv.Select %1126, %1124, %1125 : vector<4xi1>, vector<4xf16> | |
| %1129 = spirv.Select %1127, %1123, %1128 : vector<4xi1>, vector<4xf16> | |
| %1130 = spirv.CompositeConstruct %123, %354, %585, %816 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1131 = spirv.GL.FMax %1130, %1129 : vector<4xf16> | |
| %1132 = spirv.IsNan %1130 : vector<4xf16> | |
| %1133 = spirv.IsNan %1129 : vector<4xf16> | |
| %1134 = spirv.Select %1132, %1130, %1131 : vector<4xi1>, vector<4xf16> | |
| %1135 = spirv.Select %1133, %1129, %1134 : vector<4xi1>, vector<4xf16> | |
| %1136 = spirv.CompositeConstruct %126, %357, %588, %819 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1137 = spirv.GL.FMax %1136, %1135 : vector<4xf16> | |
| %1138 = spirv.IsNan %1136 : vector<4xf16> | |
| %1139 = spirv.IsNan %1135 : vector<4xf16> | |
| %1140 = spirv.Select %1138, %1136, %1137 : vector<4xi1>, vector<4xf16> | |
| %1141 = spirv.Select %1139, %1135, %1140 : vector<4xi1>, vector<4xf16> | |
| %1142 = spirv.CompositeConstruct %129, %360, %591, %822 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1143 = spirv.GL.FMax %1142, %1141 : vector<4xf16> | |
| %1144 = spirv.IsNan %1142 : vector<4xf16> | |
| %1145 = spirv.IsNan %1141 : vector<4xf16> | |
| %1146 = spirv.Select %1144, %1142, %1143 : vector<4xi1>, vector<4xf16> | |
| %1147 = spirv.Select %1145, %1141, %1146 : vector<4xi1>, vector<4xf16> | |
| %1148 = spirv.CompositeConstruct %132, %363, %594, %825 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1149 = spirv.GL.FMax %1148, %1147 : vector<4xf16> | |
| %1150 = spirv.IsNan %1148 : vector<4xf16> | |
| %1151 = spirv.IsNan %1147 : vector<4xf16> | |
| %1152 = spirv.Select %1150, %1148, %1149 : vector<4xi1>, vector<4xf16> | |
| %1153 = spirv.Select %1151, %1147, %1152 : vector<4xi1>, vector<4xf16> | |
| %1154 = spirv.CompositeConstruct %135, %366, %597, %828 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1155 = spirv.GL.FMax %1154, %1153 : vector<4xf16> | |
| %1156 = spirv.IsNan %1154 : vector<4xf16> | |
| %1157 = spirv.IsNan %1153 : vector<4xf16> | |
| %1158 = spirv.Select %1156, %1154, %1155 : vector<4xi1>, vector<4xf16> | |
| %1159 = spirv.Select %1157, %1153, %1158 : vector<4xi1>, vector<4xf16> | |
| %1160 = spirv.CompositeConstruct %138, %369, %600, %831 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1161 = spirv.GL.FMax %1160, %1159 : vector<4xf16> | |
| %1162 = spirv.IsNan %1160 : vector<4xf16> | |
| %1163 = spirv.IsNan %1159 : vector<4xf16> | |
| %1164 = spirv.Select %1162, %1160, %1161 : vector<4xi1>, vector<4xf16> | |
| %1165 = spirv.Select %1163, %1159, %1164 : vector<4xi1>, vector<4xf16> | |
| %1166 = spirv.CompositeConstruct %141, %372, %603, %834 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1167 = spirv.GL.FMax %1166, %1165 : vector<4xf16> | |
| %1168 = spirv.IsNan %1166 : vector<4xf16> | |
| %1169 = spirv.IsNan %1165 : vector<4xf16> | |
| %1170 = spirv.Select %1168, %1166, %1167 : vector<4xi1>, vector<4xf16> | |
| %1171 = spirv.Select %1169, %1165, %1170 : vector<4xi1>, vector<4xf16> | |
| %1172 = spirv.CompositeConstruct %144, %375, %606, %837 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1173 = spirv.GL.FMax %1172, %1171 : vector<4xf16> | |
| %1174 = spirv.IsNan %1172 : vector<4xf16> | |
| %1175 = spirv.IsNan %1171 : vector<4xf16> | |
| %1176 = spirv.Select %1174, %1172, %1173 : vector<4xi1>, vector<4xf16> | |
| %1177 = spirv.Select %1175, %1171, %1176 : vector<4xi1>, vector<4xf16> | |
| %1178 = spirv.CompositeConstruct %147, %378, %609, %840 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1179 = spirv.GL.FMax %1178, %1177 : vector<4xf16> | |
| %1180 = spirv.IsNan %1178 : vector<4xf16> | |
| %1181 = spirv.IsNan %1177 : vector<4xf16> | |
| %1182 = spirv.Select %1180, %1178, %1179 : vector<4xi1>, vector<4xf16> | |
| %1183 = spirv.Select %1181, %1177, %1182 : vector<4xi1>, vector<4xf16> | |
| %1184 = spirv.CompositeConstruct %150, %381, %612, %843 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1185 = spirv.GL.FMax %1184, %1183 : vector<4xf16> | |
| %1186 = spirv.IsNan %1184 : vector<4xf16> | |
| %1187 = spirv.IsNan %1183 : vector<4xf16> | |
| %1188 = spirv.Select %1186, %1184, %1185 : vector<4xi1>, vector<4xf16> | |
| %1189 = spirv.Select %1187, %1183, %1188 : vector<4xi1>, vector<4xf16> | |
| %1190 = spirv.CompositeConstruct %153, %384, %615, %846 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1191 = spirv.GL.FMax %1190, %1189 : vector<4xf16> | |
| %1192 = spirv.IsNan %1190 : vector<4xf16> | |
| %1193 = spirv.IsNan %1189 : vector<4xf16> | |
| %1194 = spirv.Select %1192, %1190, %1191 : vector<4xi1>, vector<4xf16> | |
| %1195 = spirv.Select %1193, %1189, %1194 : vector<4xi1>, vector<4xf16> | |
| %1196 = spirv.CompositeConstruct %156, %387, %618, %849 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1197 = spirv.GL.FMax %1196, %1195 : vector<4xf16> | |
| %1198 = spirv.IsNan %1196 : vector<4xf16> | |
| %1199 = spirv.IsNan %1195 : vector<4xf16> | |
| %1200 = spirv.Select %1198, %1196, %1197 : vector<4xi1>, vector<4xf16> | |
| %1201 = spirv.Select %1199, %1195, %1200 : vector<4xi1>, vector<4xf16> | |
| %1202 = spirv.CompositeConstruct %159, %390, %621, %852 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1203 = spirv.GL.FMax %1202, %1201 : vector<4xf16> | |
| %1204 = spirv.IsNan %1202 : vector<4xf16> | |
| %1205 = spirv.IsNan %1201 : vector<4xf16> | |
| %1206 = spirv.Select %1204, %1202, %1203 : vector<4xi1>, vector<4xf16> | |
| %1207 = spirv.Select %1205, %1201, %1206 : vector<4xi1>, vector<4xf16> | |
| %1208 = spirv.CompositeConstruct %162, %393, %624, %855 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1209 = spirv.GL.FMax %1208, %1207 : vector<4xf16> | |
| %1210 = spirv.IsNan %1208 : vector<4xf16> | |
| %1211 = spirv.IsNan %1207 : vector<4xf16> | |
| %1212 = spirv.Select %1210, %1208, %1209 : vector<4xi1>, vector<4xf16> | |
| %1213 = spirv.Select %1211, %1207, %1212 : vector<4xi1>, vector<4xf16> | |
| %1214 = spirv.CompositeConstruct %165, %396, %627, %858 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1215 = spirv.GL.FMax %1214, %1213 : vector<4xf16> | |
| %1216 = spirv.IsNan %1214 : vector<4xf16> | |
| %1217 = spirv.IsNan %1213 : vector<4xf16> | |
| %1218 = spirv.Select %1216, %1214, %1215 : vector<4xi1>, vector<4xf16> | |
| %1219 = spirv.Select %1217, %1213, %1218 : vector<4xi1>, vector<4xf16> | |
| %1220 = spirv.CompositeConstruct %168, %399, %630, %861 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1221 = spirv.GL.FMax %1220, %1219 : vector<4xf16> | |
| %1222 = spirv.IsNan %1220 : vector<4xf16> | |
| %1223 = spirv.IsNan %1219 : vector<4xf16> | |
| %1224 = spirv.Select %1222, %1220, %1221 : vector<4xi1>, vector<4xf16> | |
| %1225 = spirv.Select %1223, %1219, %1224 : vector<4xi1>, vector<4xf16> | |
| %1226 = spirv.CompositeConstruct %171, %402, %633, %864 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1227 = spirv.GL.FMax %1226, %1225 : vector<4xf16> | |
| %1228 = spirv.IsNan %1226 : vector<4xf16> | |
| %1229 = spirv.IsNan %1225 : vector<4xf16> | |
| %1230 = spirv.Select %1228, %1226, %1227 : vector<4xi1>, vector<4xf16> | |
| %1231 = spirv.Select %1229, %1225, %1230 : vector<4xi1>, vector<4xf16> | |
| %1232 = spirv.CompositeConstruct %174, %405, %636, %867 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1233 = spirv.GL.FMax %1232, %1231 : vector<4xf16> | |
| %1234 = spirv.IsNan %1232 : vector<4xf16> | |
| %1235 = spirv.IsNan %1231 : vector<4xf16> | |
| %1236 = spirv.Select %1234, %1232, %1233 : vector<4xi1>, vector<4xf16> | |
| %1237 = spirv.Select %1235, %1231, %1236 : vector<4xi1>, vector<4xf16> | |
| %1238 = spirv.CompositeConstruct %177, %408, %639, %870 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1239 = spirv.GL.FMax %1238, %1237 : vector<4xf16> | |
| %1240 = spirv.IsNan %1238 : vector<4xf16> | |
| %1241 = spirv.IsNan %1237 : vector<4xf16> | |
| %1242 = spirv.Select %1240, %1238, %1239 : vector<4xi1>, vector<4xf16> | |
| %1243 = spirv.Select %1241, %1237, %1242 : vector<4xi1>, vector<4xf16> | |
| %1244 = spirv.CompositeConstruct %180, %411, %642, %873 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1245 = spirv.GL.FMax %1244, %1243 : vector<4xf16> | |
| %1246 = spirv.IsNan %1244 : vector<4xf16> | |
| %1247 = spirv.IsNan %1243 : vector<4xf16> | |
| %1248 = spirv.Select %1246, %1244, %1245 : vector<4xi1>, vector<4xf16> | |
| %1249 = spirv.Select %1247, %1243, %1248 : vector<4xi1>, vector<4xf16> | |
| %1250 = spirv.CompositeConstruct %183, %414, %645, %876 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1251 = spirv.GL.FMax %1250, %1249 : vector<4xf16> | |
| %1252 = spirv.IsNan %1250 : vector<4xf16> | |
| %1253 = spirv.IsNan %1249 : vector<4xf16> | |
| %1254 = spirv.Select %1252, %1250, %1251 : vector<4xi1>, vector<4xf16> | |
| %1255 = spirv.Select %1253, %1249, %1254 : vector<4xi1>, vector<4xf16> | |
| %1256 = spirv.CompositeConstruct %186, %417, %648, %879 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1257 = spirv.GL.FMax %1256, %1255 : vector<4xf16> | |
| %1258 = spirv.IsNan %1256 : vector<4xf16> | |
| %1259 = spirv.IsNan %1255 : vector<4xf16> | |
| %1260 = spirv.Select %1258, %1256, %1257 : vector<4xi1>, vector<4xf16> | |
| %1261 = spirv.Select %1259, %1255, %1260 : vector<4xi1>, vector<4xf16> | |
| %1262 = spirv.CompositeConstruct %189, %420, %651, %882 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1263 = spirv.GL.FMax %1262, %1261 : vector<4xf16> | |
| %1264 = spirv.IsNan %1262 : vector<4xf16> | |
| %1265 = spirv.IsNan %1261 : vector<4xf16> | |
| %1266 = spirv.Select %1264, %1262, %1263 : vector<4xi1>, vector<4xf16> | |
| %1267 = spirv.Select %1265, %1261, %1266 : vector<4xi1>, vector<4xf16> | |
| %1268 = spirv.CompositeConstruct %192, %423, %654, %885 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1269 = spirv.GL.FMax %1268, %1267 : vector<4xf16> | |
| %1270 = spirv.IsNan %1268 : vector<4xf16> | |
| %1271 = spirv.IsNan %1267 : vector<4xf16> | |
| %1272 = spirv.Select %1270, %1268, %1269 : vector<4xi1>, vector<4xf16> | |
| %1273 = spirv.Select %1271, %1267, %1272 : vector<4xi1>, vector<4xf16> | |
| %1274 = spirv.CompositeConstruct %195, %426, %657, %888 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1275 = spirv.GL.FMax %1274, %1273 : vector<4xf16> | |
| %1276 = spirv.IsNan %1274 : vector<4xf16> | |
| %1277 = spirv.IsNan %1273 : vector<4xf16> | |
| %1278 = spirv.Select %1276, %1274, %1275 : vector<4xi1>, vector<4xf16> | |
| %1279 = spirv.Select %1277, %1273, %1278 : vector<4xi1>, vector<4xf16> | |
| %1280 = spirv.CompositeConstruct %198, %429, %660, %891 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1281 = spirv.GL.FMax %1280, %1279 : vector<4xf16> | |
| %1282 = spirv.IsNan %1280 : vector<4xf16> | |
| %1283 = spirv.IsNan %1279 : vector<4xf16> | |
| %1284 = spirv.Select %1282, %1280, %1281 : vector<4xi1>, vector<4xf16> | |
| %1285 = spirv.Select %1283, %1279, %1284 : vector<4xi1>, vector<4xf16> | |
| %1286 = spirv.CompositeConstruct %201, %432, %663, %894 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1287 = spirv.GL.FMax %1286, %1285 : vector<4xf16> | |
| %1288 = spirv.IsNan %1286 : vector<4xf16> | |
| %1289 = spirv.IsNan %1285 : vector<4xf16> | |
| %1290 = spirv.Select %1288, %1286, %1287 : vector<4xi1>, vector<4xf16> | |
| %1291 = spirv.Select %1289, %1285, %1290 : vector<4xi1>, vector<4xf16> | |
| %1292 = spirv.CompositeConstruct %204, %435, %666, %897 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1293 = spirv.GL.FMax %1292, %1291 : vector<4xf16> | |
| %1294 = spirv.IsNan %1292 : vector<4xf16> | |
| %1295 = spirv.IsNan %1291 : vector<4xf16> | |
| %1296 = spirv.Select %1294, %1292, %1293 : vector<4xi1>, vector<4xf16> | |
| %1297 = spirv.Select %1295, %1291, %1296 : vector<4xi1>, vector<4xf16> | |
| %1298 = spirv.CompositeConstruct %207, %438, %669, %900 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1299 = spirv.GL.FMax %1298, %1297 : vector<4xf16> | |
| %1300 = spirv.IsNan %1298 : vector<4xf16> | |
| %1301 = spirv.IsNan %1297 : vector<4xf16> | |
| %1302 = spirv.Select %1300, %1298, %1299 : vector<4xi1>, vector<4xf16> | |
| %1303 = spirv.Select %1301, %1297, %1302 : vector<4xi1>, vector<4xf16> | |
| %1304 = spirv.CompositeConstruct %210, %441, %672, %903 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1305 = spirv.GL.FMax %1304, %1303 : vector<4xf16> | |
| %1306 = spirv.IsNan %1304 : vector<4xf16> | |
| %1307 = spirv.IsNan %1303 : vector<4xf16> | |
| %1308 = spirv.Select %1306, %1304, %1305 : vector<4xi1>, vector<4xf16> | |
| %1309 = spirv.Select %1307, %1303, %1308 : vector<4xi1>, vector<4xf16> | |
| %1310 = spirv.CompositeConstruct %213, %444, %675, %906 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1311 = spirv.GL.FMax %1310, %1309 : vector<4xf16> | |
| %1312 = spirv.IsNan %1310 : vector<4xf16> | |
| %1313 = spirv.IsNan %1309 : vector<4xf16> | |
| %1314 = spirv.Select %1312, %1310, %1311 : vector<4xi1>, vector<4xf16> | |
| %1315 = spirv.Select %1313, %1309, %1314 : vector<4xi1>, vector<4xf16> | |
| %1316 = spirv.CompositeConstruct %216, %447, %678, %909 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1317 = spirv.GL.FMax %1316, %1315 : vector<4xf16> | |
| %1318 = spirv.IsNan %1316 : vector<4xf16> | |
| %1319 = spirv.IsNan %1315 : vector<4xf16> | |
| %1320 = spirv.Select %1318, %1316, %1317 : vector<4xi1>, vector<4xf16> | |
| %1321 = spirv.Select %1319, %1315, %1320 : vector<4xi1>, vector<4xf16> | |
| %1322 = spirv.CompositeConstruct %219, %450, %681, %912 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1323 = spirv.GL.FMax %1322, %1321 : vector<4xf16> | |
| %1324 = spirv.IsNan %1322 : vector<4xf16> | |
| %1325 = spirv.IsNan %1321 : vector<4xf16> | |
| %1326 = spirv.Select %1324, %1322, %1323 : vector<4xi1>, vector<4xf16> | |
| %1327 = spirv.Select %1325, %1321, %1326 : vector<4xi1>, vector<4xf16> | |
| %1328 = spirv.CompositeConstruct %222, %453, %684, %915 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1329 = spirv.GL.FMax %1328, %1327 : vector<4xf16> | |
| %1330 = spirv.IsNan %1328 : vector<4xf16> | |
| %1331 = spirv.IsNan %1327 : vector<4xf16> | |
| %1332 = spirv.Select %1330, %1328, %1329 : vector<4xi1>, vector<4xf16> | |
| %1333 = spirv.Select %1331, %1327, %1332 : vector<4xi1>, vector<4xf16> | |
| %1334 = spirv.CompositeConstruct %225, %456, %687, %918 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1335 = spirv.GL.FMax %1334, %1333 : vector<4xf16> | |
| %1336 = spirv.IsNan %1334 : vector<4xf16> | |
| %1337 = spirv.IsNan %1333 : vector<4xf16> | |
| %1338 = spirv.Select %1336, %1334, %1335 : vector<4xi1>, vector<4xf16> | |
| %1339 = spirv.Select %1337, %1333, %1338 : vector<4xi1>, vector<4xf16> | |
| %1340 = spirv.CompositeConstruct %228, %459, %690, %921 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1341 = spirv.GL.FMax %1340, %1339 : vector<4xf16> | |
| %1342 = spirv.IsNan %1340 : vector<4xf16> | |
| %1343 = spirv.IsNan %1339 : vector<4xf16> | |
| %1344 = spirv.Select %1342, %1340, %1341 : vector<4xi1>, vector<4xf16> | |
| %1345 = spirv.Select %1343, %1339, %1344 : vector<4xi1>, vector<4xf16> | |
| %1346 = spirv.CompositeConstruct %231, %462, %693, %924 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1347 = spirv.GL.FMax %1346, %1345 : vector<4xf16> | |
| %1348 = spirv.IsNan %1346 : vector<4xf16> | |
| %1349 = spirv.IsNan %1345 : vector<4xf16> | |
| %1350 = spirv.Select %1348, %1346, %1347 : vector<4xi1>, vector<4xf16> | |
| %1351 = spirv.Select %1349, %1345, %1350 : vector<4xi1>, vector<4xf16> | |
| %1352 = spirv.CompositeConstruct %234, %465, %696, %927 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1353 = spirv.GL.FMax %1352, %1351 : vector<4xf16> | |
| %1354 = spirv.IsNan %1352 : vector<4xf16> | |
| %1355 = spirv.IsNan %1351 : vector<4xf16> | |
| %1356 = spirv.Select %1354, %1352, %1353 : vector<4xi1>, vector<4xf16> | |
| %1357 = spirv.Select %1355, %1351, %1356 : vector<4xi1>, vector<4xf16> | |
| %1358 = spirv.CompositeConstruct %237, %468, %699, %930 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1359 = spirv.GL.FMax %1358, %1357 : vector<4xf16> | |
| %1360 = spirv.IsNan %1358 : vector<4xf16> | |
| %1361 = spirv.IsNan %1357 : vector<4xf16> | |
| %1362 = spirv.Select %1360, %1358, %1359 : vector<4xi1>, vector<4xf16> | |
| %1363 = spirv.Select %1361, %1357, %1362 : vector<4xi1>, vector<4xf16> | |
| %1364 = spirv.CompositeConstruct %240, %471, %702, %933 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1365 = spirv.GL.FMax %1364, %1363 : vector<4xf16> | |
| %1366 = spirv.IsNan %1364 : vector<4xf16> | |
| %1367 = spirv.IsNan %1363 : vector<4xf16> | |
| %1368 = spirv.Select %1366, %1364, %1365 : vector<4xi1>, vector<4xf16> | |
| %1369 = spirv.Select %1367, %1363, %1368 : vector<4xi1>, vector<4xf16> | |
| %1370 = spirv.CompositeConstruct %243, %474, %705, %936 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1371 = spirv.GL.FMax %1370, %1369 : vector<4xf16> | |
| %1372 = spirv.IsNan %1370 : vector<4xf16> | |
| %1373 = spirv.IsNan %1369 : vector<4xf16> | |
| %1374 = spirv.Select %1372, %1370, %1371 : vector<4xi1>, vector<4xf16> | |
| %1375 = spirv.Select %1373, %1369, %1374 : vector<4xi1>, vector<4xf16> | |
| %1376 = spirv.CompositeConstruct %246, %477, %708, %939 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1377 = spirv.GL.FMax %1376, %1375 : vector<4xf16> | |
| %1378 = spirv.IsNan %1376 : vector<4xf16> | |
| %1379 = spirv.IsNan %1375 : vector<4xf16> | |
| %1380 = spirv.Select %1378, %1376, %1377 : vector<4xi1>, vector<4xf16> | |
| %1381 = spirv.Select %1379, %1375, %1380 : vector<4xi1>, vector<4xf16> | |
| %1382 = spirv.CompositeConstruct %249, %480, %711, %942 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1383 = spirv.GL.FMax %1382, %1381 : vector<4xf16> | |
| %1384 = spirv.IsNan %1382 : vector<4xf16> | |
| %1385 = spirv.IsNan %1381 : vector<4xf16> | |
| %1386 = spirv.Select %1384, %1382, %1383 : vector<4xi1>, vector<4xf16> | |
| %1387 = spirv.Select %1385, %1381, %1386 : vector<4xi1>, vector<4xf16> | |
| %1388 = spirv.CompositeConstruct %252, %483, %714, %945 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1389 = spirv.GL.FMax %1388, %1387 : vector<4xf16> | |
| %1390 = spirv.IsNan %1388 : vector<4xf16> | |
| %1391 = spirv.IsNan %1387 : vector<4xf16> | |
| %1392 = spirv.Select %1390, %1388, %1389 : vector<4xi1>, vector<4xf16> | |
| %1393 = spirv.Select %1391, %1387, %1392 : vector<4xi1>, vector<4xf16> | |
| %1394 = spirv.CompositeConstruct %255, %486, %717, %948 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1395 = spirv.GL.FMax %1394, %1393 : vector<4xf16> | |
| %1396 = spirv.IsNan %1394 : vector<4xf16> | |
| %1397 = spirv.IsNan %1393 : vector<4xf16> | |
| %1398 = spirv.Select %1396, %1394, %1395 : vector<4xi1>, vector<4xf16> | |
| %1399 = spirv.Select %1397, %1393, %1398 : vector<4xi1>, vector<4xf16> | |
| %1400 = spirv.CompositeConstruct %258, %489, %720, %951 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1401 = spirv.GL.FMax %1400, %1399 : vector<4xf16> | |
| %1402 = spirv.IsNan %1400 : vector<4xf16> | |
| %1403 = spirv.IsNan %1399 : vector<4xf16> | |
| %1404 = spirv.Select %1402, %1400, %1401 : vector<4xi1>, vector<4xf16> | |
| %1405 = spirv.Select %1403, %1399, %1404 : vector<4xi1>, vector<4xf16> | |
| %1406 = spirv.CompositeConstruct %261, %492, %723, %954 : (f16, f16, f16, f16) -> vector<4xf16> | |
| %1407 = spirv.GL.FMax %1406, %1405 : vector<4xf16> | |
| %1408 = spirv.IsNan %1406 : vector<4xf16> | |
| %1409 = spirv.IsNan %1405 : vector<4xf16> | |
| %1410 = spirv.Select %1408, %1406, %1407 : vector<4xi1>, vector<4xf16> | |
| %1411 = spirv.Select %1409, %1405, %1410 : vector<4xi1>, vector<4xf16> | |
| %1412 = spirv.CompositeExtract %1411[0 : i32] : vector<4xf16> | |
| %1413 = spirv.CompositeExtract %1411[1 : i32] : vector<4xf16> | |
| %1414 = spirv.CompositeExtract %1411[2 : i32] : vector<4xf16> | |
| %1415 = spirv.CompositeExtract %1411[3 : i32] : vector<4xf16> | |
| %1416 = spirv.FSub %33, %1412 : f16 | |
| %1417 = spirv.FSub %36, %1412 : f16 | |
| %1418 = spirv.FSub %39, %1412 : f16 | |
| %1419 = spirv.FSub %42, %1412 : f16 | |
| %1420 = spirv.FSub %45, %1412 : f16 | |
| %1421 = spirv.FSub %48, %1412 : f16 | |
| %1422 = spirv.FSub %51, %1412 : f16 | |
| %1423 = spirv.FSub %54, %1412 : f16 | |
| %1424 = spirv.FSub %57, %1412 : f16 | |
| %1425 = spirv.FSub %60, %1412 : f16 | |
| %1426 = spirv.FSub %63, %1412 : f16 | |
| %1427 = spirv.FSub %66, %1412 : f16 | |
| %1428 = spirv.FSub %69, %1412 : f16 | |
| %1429 = spirv.FSub %72, %1412 : f16 | |
| %1430 = spirv.FSub %75, %1412 : f16 | |
| %1431 = spirv.FSub %78, %1412 : f16 | |
| %1432 = spirv.FSub %81, %1412 : f16 | |
| %1433 = spirv.FSub %84, %1412 : f16 | |
| %1434 = spirv.FSub %87, %1412 : f16 | |
| %1435 = spirv.FSub %90, %1412 : f16 | |
| %1436 = spirv.FSub %93, %1412 : f16 | |
| %1437 = spirv.FSub %96, %1412 : f16 | |
| %1438 = spirv.FSub %99, %1412 : f16 | |
| %1439 = spirv.FSub %102, %1412 : f16 | |
| %1440 = spirv.FSub %105, %1412 : f16 | |
| %1441 = spirv.FSub %108, %1412 : f16 | |
| %1442 = spirv.FSub %111, %1412 : f16 | |
| %1443 = spirv.FSub %114, %1412 : f16 | |
| %1444 = spirv.FSub %117, %1412 : f16 | |
| %1445 = spirv.FSub %120, %1412 : f16 | |
| %1446 = spirv.FSub %123, %1412 : f16 | |
| %1447 = spirv.FSub %126, %1412 : f16 | |
| %1448 = spirv.FSub %129, %1412 : f16 | |
| %1449 = spirv.FSub %132, %1412 : f16 | |
| %1450 = spirv.FSub %135, %1412 : f16 | |
| %1451 = spirv.FSub %138, %1412 : f16 | |
| %1452 = spirv.FSub %141, %1412 : f16 | |
| %1453 = spirv.FSub %144, %1412 : f16 | |
| %1454 = spirv.FSub %147, %1412 : f16 | |
| %1455 = spirv.FSub %150, %1412 : f16 | |
| %1456 = spirv.FSub %153, %1412 : f16 | |
| %1457 = spirv.FSub %156, %1412 : f16 | |
| %1458 = spirv.FSub %159, %1412 : f16 | |
| %1459 = spirv.FSub %162, %1412 : f16 | |
| %1460 = spirv.FSub %165, %1412 : f16 | |
| %1461 = spirv.FSub %168, %1412 : f16 | |
| %1462 = spirv.FSub %171, %1412 : f16 | |
| %1463 = spirv.FSub %174, %1412 : f16 | |
| %1464 = spirv.FSub %177, %1412 : f16 | |
| %1465 = spirv.FSub %180, %1412 : f16 | |
| %1466 = spirv.FSub %183, %1412 : f16 | |
| %1467 = spirv.FSub %186, %1412 : f16 | |
| %1468 = spirv.FSub %189, %1412 : f16 | |
| %1469 = spirv.FSub %192, %1412 : f16 | |
| %1470 = spirv.FSub %195, %1412 : f16 | |
| %1471 = spirv.FSub %198, %1412 : f16 | |
| %1472 = spirv.FSub %201, %1412 : f16 | |
| %1473 = spirv.FSub %204, %1412 : f16 | |
| %1474 = spirv.FSub %207, %1412 : f16 | |
| %1475 = spirv.FSub %210, %1412 : f16 | |
| %1476 = spirv.FSub %213, %1412 : f16 | |
| %1477 = spirv.FSub %216, %1412 : f16 | |
| %1478 = spirv.FSub %219, %1412 : f16 | |
| %1479 = spirv.FSub %222, %1412 : f16 | |
| %1480 = spirv.FSub %225, %1412 : f16 | |
| %1481 = spirv.FSub %228, %1412 : f16 | |
| %1482 = spirv.FSub %231, %1412 : f16 | |
| %1483 = spirv.FSub %234, %1412 : f16 | |
| %1484 = spirv.FSub %237, %1412 : f16 | |
| %1485 = spirv.FSub %240, %1412 : f16 | |
| %1486 = spirv.FSub %243, %1412 : f16 | |
| %1487 = spirv.FSub %246, %1412 : f16 | |
| %1488 = spirv.FSub %249, %1412 : f16 | |
| %1489 = spirv.FSub %252, %1412 : f16 | |
| %1490 = spirv.FSub %255, %1412 : f16 | |
| %1491 = spirv.FSub %258, %1412 : f16 | |
| %1492 = spirv.FSub %261, %1412 : f16 | |
| %1493 = spirv.FSub %264, %1413 : f16 | |
| %1494 = spirv.FSub %267, %1413 : f16 | |
| %1495 = spirv.FSub %270, %1413 : f16 | |
| %1496 = spirv.FSub %273, %1413 : f16 | |
| %1497 = spirv.FSub %276, %1413 : f16 | |
| %1498 = spirv.FSub %279, %1413 : f16 | |
| %1499 = spirv.FSub %282, %1413 : f16 | |
| %1500 = spirv.FSub %285, %1413 : f16 | |
| %1501 = spirv.FSub %288, %1413 : f16 | |
| %1502 = spirv.FSub %291, %1413 : f16 | |
| %1503 = spirv.FSub %294, %1413 : f16 | |
| %1504 = spirv.FSub %297, %1413 : f16 | |
| %1505 = spirv.FSub %300, %1413 : f16 | |
| %1506 = spirv.FSub %303, %1413 : f16 | |
| %1507 = spirv.FSub %306, %1413 : f16 | |
| %1508 = spirv.FSub %309, %1413 : f16 | |
| %1509 = spirv.FSub %312, %1413 : f16 | |
| %1510 = spirv.FSub %315, %1413 : f16 | |
| %1511 = spirv.FSub %318, %1413 : f16 | |
| %1512 = spirv.FSub %321, %1413 : f16 | |
| %1513 = spirv.FSub %324, %1413 : f16 | |
| %1514 = spirv.FSub %327, %1413 : f16 | |
| %1515 = spirv.FSub %330, %1413 : f16 | |
| %1516 = spirv.FSub %333, %1413 : f16 | |
| %1517 = spirv.FSub %336, %1413 : f16 | |
| %1518 = spirv.FSub %339, %1413 : f16 | |
| %1519 = spirv.FSub %342, %1413 : f16 | |
| %1520 = spirv.FSub %345, %1413 : f16 | |
| %1521 = spirv.FSub %348, %1413 : f16 | |
| %1522 = spirv.FSub %351, %1413 : f16 | |
| %1523 = spirv.FSub %354, %1413 : f16 | |
| %1524 = spirv.FSub %357, %1413 : f16 | |
| %1525 = spirv.FSub %360, %1413 : f16 | |
| %1526 = spirv.FSub %363, %1413 : f16 | |
| %1527 = spirv.FSub %366, %1413 : f16 | |
| %1528 = spirv.FSub %369, %1413 : f16 | |
| %1529 = spirv.FSub %372, %1413 : f16 | |
| %1530 = spirv.FSub %375, %1413 : f16 | |
| %1531 = spirv.FSub %378, %1413 : f16 | |
| %1532 = spirv.FSub %381, %1413 : f16 | |
| %1533 = spirv.FSub %384, %1413 : f16 | |
| %1534 = spirv.FSub %387, %1413 : f16 | |
| %1535 = spirv.FSub %390, %1413 : f16 | |
| %1536 = spirv.FSub %393, %1413 : f16 | |
| %1537 = spirv.FSub %396, %1413 : f16 | |
| %1538 = spirv.FSub %399, %1413 : f16 | |
| %1539 = spirv.FSub %402, %1413 : f16 | |
| %1540 = spirv.FSub %405, %1413 : f16 | |
| %1541 = spirv.FSub %408, %1413 : f16 | |
| %1542 = spirv.FSub %411, %1413 : f16 | |
| %1543 = spirv.FSub %414, %1413 : f16 | |
| %1544 = spirv.FSub %417, %1413 : f16 | |
| %1545 = spirv.FSub %420, %1413 : f16 | |
| %1546 = spirv.FSub %423, %1413 : f16 | |
| %1547 = spirv.FSub %426, %1413 : f16 | |
| %1548 = spirv.FSub %429, %1413 : f16 | |
| %1549 = spirv.FSub %432, %1413 : f16 | |
| %1550 = spirv.FSub %435, %1413 : f16 | |
| %1551 = spirv.FSub %438, %1413 : f16 | |
| %1552 = spirv.FSub %441, %1413 : f16 | |
| %1553 = spirv.FSub %444, %1413 : f16 | |
| %1554 = spirv.FSub %447, %1413 : f16 | |
| %1555 = spirv.FSub %450, %1413 : f16 | |
| %1556 = spirv.FSub %453, %1413 : f16 | |
| %1557 = spirv.FSub %456, %1413 : f16 | |
| %1558 = spirv.FSub %459, %1413 : f16 | |
| %1559 = spirv.FSub %462, %1413 : f16 | |
| %1560 = spirv.FSub %465, %1413 : f16 | |
| %1561 = spirv.FSub %468, %1413 : f16 | |
| %1562 = spirv.FSub %471, %1413 : f16 | |
| %1563 = spirv.FSub %474, %1413 : f16 | |
| %1564 = spirv.FSub %477, %1413 : f16 | |
| %1565 = spirv.FSub %480, %1413 : f16 | |
| %1566 = spirv.FSub %483, %1413 : f16 | |
| %1567 = spirv.FSub %486, %1413 : f16 | |
| %1568 = spirv.FSub %489, %1413 : f16 | |
| %1569 = spirv.FSub %492, %1413 : f16 | |
| %1570 = spirv.FSub %495, %1414 : f16 | |
| %1571 = spirv.FSub %498, %1414 : f16 | |
| %1572 = spirv.FSub %501, %1414 : f16 | |
| %1573 = spirv.FSub %504, %1414 : f16 | |
| %1574 = spirv.FSub %507, %1414 : f16 | |
| %1575 = spirv.FSub %510, %1414 : f16 | |
| %1576 = spirv.FSub %513, %1414 : f16 | |
| %1577 = spirv.FSub %516, %1414 : f16 | |
| %1578 = spirv.FSub %519, %1414 : f16 | |
| %1579 = spirv.FSub %522, %1414 : f16 | |
| %1580 = spirv.FSub %525, %1414 : f16 | |
| %1581 = spirv.FSub %528, %1414 : f16 | |
| %1582 = spirv.FSub %531, %1414 : f16 | |
| %1583 = spirv.FSub %534, %1414 : f16 | |
| %1584 = spirv.FSub %537, %1414 : f16 | |
| %1585 = spirv.FSub %540, %1414 : f16 | |
| %1586 = spirv.FSub %543, %1414 : f16 | |
| %1587 = spirv.FSub %546, %1414 : f16 | |
| %1588 = spirv.FSub %549, %1414 : f16 | |
| %1589 = spirv.FSub %552, %1414 : f16 | |
| %1590 = spirv.FSub %555, %1414 : f16 | |
| %1591 = spirv.FSub %558, %1414 : f16 | |
| %1592 = spirv.FSub %561, %1414 : f16 | |
| %1593 = spirv.FSub %564, %1414 : f16 | |
| %1594 = spirv.FSub %567, %1414 : f16 | |
| %1595 = spirv.FSub %570, %1414 : f16 | |
| %1596 = spirv.FSub %573, %1414 : f16 | |
| %1597 = spirv.FSub %576, %1414 : f16 | |
| %1598 = spirv.FSub %579, %1414 : f16 | |
| %1599 = spirv.FSub %582, %1414 : f16 | |
| %1600 = spirv.FSub %585, %1414 : f16 | |
| %1601 = spirv.FSub %588, %1414 : f16 | |
| %1602 = spirv.FSub %591, %1414 : f16 | |
| %1603 = spirv.FSub %594, %1414 : f16 | |
| %1604 = spirv.FSub %597, %1414 : f16 | |
| %1605 = spirv.FSub %600, %1414 : f16 | |
| %1606 = spirv.FSub %603, %1414 : f16 | |
| %1607 = spirv.FSub %606, %1414 : f16 | |
| %1608 = spirv.FSub %609, %1414 : f16 | |
| %1609 = spirv.FSub %612, %1414 : f16 | |
| %1610 = spirv.FSub %615, %1414 : f16 | |
| %1611 = spirv.FSub %618, %1414 : f16 | |
| %1612 = spirv.FSub %621, %1414 : f16 | |
| %1613 = spirv.FSub %624, %1414 : f16 | |
| %1614 = spirv.FSub %627, %1414 : f16 | |
| %1615 = spirv.FSub %630, %1414 : f16 | |
| %1616 = spirv.FSub %633, %1414 : f16 | |
| %1617 = spirv.FSub %636, %1414 : f16 | |
| %1618 = spirv.FSub %639, %1414 : f16 | |
| %1619 = spirv.FSub %642, %1414 : f16 | |
| %1620 = spirv.FSub %645, %1414 : f16 | |
| %1621 = spirv.FSub %648, %1414 : f16 | |
| %1622 = spirv.FSub %651, %1414 : f16 | |
| %1623 = spirv.FSub %654, %1414 : f16 | |
| %1624 = spirv.FSub %657, %1414 : f16 | |
| %1625 = spirv.FSub %660, %1414 : f16 | |
| %1626 = spirv.FSub %663, %1414 : f16 | |
| %1627 = spirv.FSub %666, %1414 : f16 | |
| %1628 = spirv.FSub %669, %1414 : f16 | |
| %1629 = spirv.FSub %672, %1414 : f16 | |
| %1630 = spirv.FSub %675, %1414 : f16 | |
| %1631 = spirv.FSub %678, %1414 : f16 | |
| %1632 = spirv.FSub %681, %1414 : f16 | |
| %1633 = spirv.FSub %684, %1414 : f16 | |
| %1634 = spirv.FSub %687, %1414 : f16 | |
| %1635 = spirv.FSub %690, %1414 : f16 | |
| %1636 = spirv.FSub %693, %1414 : f16 | |
| %1637 = spirv.FSub %696, %1414 : f16 | |
| %1638 = spirv.FSub %699, %1414 : f16 | |
| %1639 = spirv.FSub %702, %1414 : f16 | |
| %1640 = spirv.FSub %705, %1414 : f16 | |
| %1641 = spirv.FSub %708, %1414 : f16 | |
| %1642 = spirv.FSub %711, %1414 : f16 | |
| %1643 = spirv.FSub %714, %1414 : f16 | |
| %1644 = spirv.FSub %717, %1414 : f16 | |
| %1645 = spirv.FSub %720, %1414 : f16 | |
| %1646 = spirv.FSub %723, %1414 : f16 | |
| %1647 = spirv.FSub %726, %1415 : f16 | |
| %1648 = spirv.FSub %729, %1415 : f16 | |
| %1649 = spirv.FSub %732, %1415 : f16 | |
| %1650 = spirv.FSub %735, %1415 : f16 | |
| %1651 = spirv.FSub %738, %1415 : f16 | |
| %1652 = spirv.FSub %741, %1415 : f16 | |
| %1653 = spirv.FSub %744, %1415 : f16 | |
| %1654 = spirv.FSub %747, %1415 : f16 | |
| %1655 = spirv.FSub %750, %1415 : f16 | |
| %1656 = spirv.FSub %753, %1415 : f16 | |
| %1657 = spirv.FSub %756, %1415 : f16 | |
| %1658 = spirv.FSub %759, %1415 : f16 | |
| %1659 = spirv.FSub %762, %1415 : f16 | |
| %1660 = spirv.FSub %765, %1415 : f16 | |
| %1661 = spirv.FSub %768, %1415 : f16 | |
| %1662 = spirv.FSub %771, %1415 : f16 | |
| %1663 = spirv.FSub %774, %1415 : f16 | |
| %1664 = spirv.FSub %777, %1415 : f16 | |
| %1665 = spirv.FSub %780, %1415 : f16 | |
| %1666 = spirv.FSub %783, %1415 : f16 | |
| %1667 = spirv.FSub %786, %1415 : f16 | |
| %1668 = spirv.FSub %789, %1415 : f16 | |
| %1669 = spirv.FSub %792, %1415 : f16 | |
| %1670 = spirv.FSub %795, %1415 : f16 | |
| %1671 = spirv.FSub %798, %1415 : f16 | |
| %1672 = spirv.FSub %801, %1415 : f16 | |
| %1673 = spirv.FSub %804, %1415 : f16 | |
| %1674 = spirv.FSub %807, %1415 : f16 | |
| %1675 = spirv.FSub %810, %1415 : f16 | |
| %1676 = spirv.FSub %813, %1415 : f16 | |
| %1677 = spirv.FSub %816, %1415 : f16 | |
| %1678 = spirv.FSub %819, %1415 : f16 | |
| %1679 = spirv.FSub %822, %1415 : f16 | |
| %1680 = spirv.FSub %825, %1415 : f16 | |
| %1681 = spirv.FSub %828, %1415 : f16 | |
| %1682 = spirv.FSub %831, %1415 : f16 | |
| %1683 = spirv.FSub %834, %1415 : f16 | |
| %1684 = spirv.FSub %837, %1415 : f16 | |
| %1685 = spirv.FSub %840, %1415 : f16 | |
| %1686 = spirv.FSub %843, %1415 : f16 | |
| %1687 = spirv.FSub %846, %1415 : f16 | |
| %1688 = spirv.FSub %849, %1415 : f16 | |
| %1689 = spirv.FSub %852, %1415 : f16 | |
| %1690 = spirv.FSub %855, %1415 : f16 | |
| %1691 = spirv.FSub %858, %1415 : f16 | |
| %1692 = spirv.FSub %861, %1415 : f16 | |
| %1693 = spirv.FSub %864, %1415 : f16 | |
| %1694 = spirv.FSub %867, %1415 : f16 | |
| %1695 = spirv.FSub %870, %1415 : f16 | |
| %1696 = spirv.FSub %873, %1415 : f16 | |
| %1697 = spirv.FSub %876, %1415 : f16 | |
| %1698 = spirv.FSub %879, %1415 : f16 | |
| %1699 = spirv.FSub %882, %1415 : f16 | |
| %1700 = spirv.FSub %885, %1415 : f16 | |
| %1701 = spirv.FSub %888, %1415 : f16 | |
| %1702 = spirv.FSub %891, %1415 : f16 | |
| %1703 = spirv.FSub %894, %1415 : f16 | |
| %1704 = spirv.FSub %897, %1415 : f16 | |
| %1705 = spirv.FSub %900, %1415 : f16 | |
| %1706 = spirv.FSub %903, %1415 : f16 | |
| %1707 = spirv.FSub %906, %1415 : f16 | |
| %1708 = spirv.FSub %909, %1415 : f16 | |
| %1709 = spirv.FSub %912, %1415 : f16 | |
| %1710 = spirv.FSub %915, %1415 : f16 | |
| %1711 = spirv.FSub %918, %1415 : f16 | |
| %1712 = spirv.FSub %921, %1415 : f16 | |
| %1713 = spirv.FSub %924, %1415 : f16 | |
| %1714 = spirv.FSub %927, %1415 : f16 | |
| %1715 = spirv.FSub %930, %1415 : f16 | |
| %1716 = spirv.FSub %933, %1415 : f16 | |
| %1717 = spirv.FSub %936, %1415 : f16 | |
| %1718 = spirv.FSub %939, %1415 : f16 | |
| %1719 = spirv.FSub %942, %1415 : f16 | |
| %1720 = spirv.FSub %945, %1415 : f16 | |
| %1721 = spirv.FSub %948, %1415 : f16 | |
| %1722 = spirv.FSub %951, %1415 : f16 | |
| %1723 = spirv.FSub %954, %1415 : f16 | |
| %1724 = spirv.FConvert %1416 : f16 to f32 | |
| %1725 = spirv.IsNan %1724 : f32 | |
| %1726 = spirv.LogicalOr %1725, %1725 : i1 | |
| %1727 = spirv.FMul %1724, %cst_f32_0 : f32 | |
| %1728 = spirv.GL.Floor %1727 : f32 | |
| %1729 = spirv.FMul %1728, %cst_f32 : f32 | |
| %1730 = spirv.FSub %1724, %1729 : f32 | |
| %1731 = spirv.FMul %1730, %1730 : f32 | |
| %1732 = spirv.FMul %1731, %1731 : f32 | |
| %1733 = spirv.GL.Fma %cst_f32_1, %1730, %cst_f32_1 : f32 | |
| %1734 = spirv.GL.Fma %cst_f32_3, %1730, %cst_f32_2 : f32 | |
| %1735 = spirv.GL.Fma %cst_f32_5, %1730, %cst_f32_4 : f32 | |
| %1736 = spirv.GL.Fma %1734, %1731, %1733 : f32 | |
| %1737 = spirv.GL.Fma %1735, %1732, %1736 : f32 | |
| %1738 = spirv.ConvertFToS %1728 : f32 to i32 | |
| %1739 = spirv.IAdd %1738, %cst127_i32 : i32 | |
| %1740 = spirv.ShiftLeftLogical %1739, %cst23_i32 : i32, i32 | |
| %1741 = spirv.Bitcast %1740 : i32 to f32 | |
| %1742 = spirv.FMul %1737, %1741 : f32 | |
| %1743 = spirv.SLessThanEqual %1738, %cst127_i32 : i32 | |
| %1744 = spirv.SGreaterThanEqual %1738, %cst-127_i32 : i32 | |
| %1745 = spirv.FOrdEqual %1724, %cst_f32_8 : f32 | |
| %1746 = spirv.FOrdEqual %1724, %cst_f32_7 : f32 | |
| %1747 = spirv.FOrdGreaterThan %1724, %cst_f32_6 : f32 | |
| %1748 = spirv.LogicalAnd %1743, %1744 : i1 | |
| %1749 = spirv.Select %1747, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %1750 = spirv.Select %1748, %1742, %1749 : i1, f32 | |
| %1751 = spirv.Select %1746, %cst_f32_7, %1750 : i1, f32 | |
| %1752 = spirv.Select %1745, %cst_f32_6, %1751 : i1, f32 | |
| %1753 = spirv.Select %1726, %1724, %1752 : i1, f32 | |
| %1754 = spirv.FConvert %1753 : f32 to f16 | |
| %1755 = spirv.FConvert %1417 : f16 to f32 | |
| %1756 = spirv.IsNan %1755 : f32 | |
| %1757 = spirv.LogicalOr %1756, %1756 : i1 | |
| %1758 = spirv.FMul %1755, %cst_f32_0 : f32 | |
| %1759 = spirv.GL.Floor %1758 : f32 | |
| %1760 = spirv.FMul %1759, %cst_f32 : f32 | |
| %1761 = spirv.FSub %1755, %1760 : f32 | |
| %1762 = spirv.FMul %1761, %1761 : f32 | |
| %1763 = spirv.FMul %1762, %1762 : f32 | |
| %1764 = spirv.GL.Fma %cst_f32_1, %1761, %cst_f32_1 : f32 | |
| %1765 = spirv.GL.Fma %cst_f32_3, %1761, %cst_f32_2 : f32 | |
| %1766 = spirv.GL.Fma %cst_f32_5, %1761, %cst_f32_4 : f32 | |
| %1767 = spirv.GL.Fma %1765, %1762, %1764 : f32 | |
| %1768 = spirv.GL.Fma %1766, %1763, %1767 : f32 | |
| %1769 = spirv.ConvertFToS %1759 : f32 to i32 | |
| %1770 = spirv.IAdd %1769, %cst127_i32 : i32 | |
| %1771 = spirv.ShiftLeftLogical %1770, %cst23_i32 : i32, i32 | |
| %1772 = spirv.Bitcast %1771 : i32 to f32 | |
| %1773 = spirv.FMul %1768, %1772 : f32 | |
| %1774 = spirv.SLessThanEqual %1769, %cst127_i32 : i32 | |
| %1775 = spirv.SGreaterThanEqual %1769, %cst-127_i32 : i32 | |
| %1776 = spirv.FOrdEqual %1755, %cst_f32_8 : f32 | |
| %1777 = spirv.FOrdEqual %1755, %cst_f32_7 : f32 | |
| %1778 = spirv.FOrdGreaterThan %1755, %cst_f32_6 : f32 | |
| %1779 = spirv.LogicalAnd %1774, %1775 : i1 | |
| %1780 = spirv.Select %1778, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %1781 = spirv.Select %1779, %1773, %1780 : i1, f32 | |
| %1782 = spirv.Select %1777, %cst_f32_7, %1781 : i1, f32 | |
| %1783 = spirv.Select %1776, %cst_f32_6, %1782 : i1, f32 | |
| %1784 = spirv.Select %1757, %1755, %1783 : i1, f32 | |
| %1785 = spirv.FConvert %1784 : f32 to f16 | |
| %1786 = spirv.FConvert %1418 : f16 to f32 | |
| %1787 = spirv.IsNan %1786 : f32 | |
| %1788 = spirv.LogicalOr %1787, %1787 : i1 | |
| %1789 = spirv.FMul %1786, %cst_f32_0 : f32 | |
| %1790 = spirv.GL.Floor %1789 : f32 | |
| %1791 = spirv.FMul %1790, %cst_f32 : f32 | |
| %1792 = spirv.FSub %1786, %1791 : f32 | |
| %1793 = spirv.FMul %1792, %1792 : f32 | |
| %1794 = spirv.FMul %1793, %1793 : f32 | |
| %1795 = spirv.GL.Fma %cst_f32_1, %1792, %cst_f32_1 : f32 | |
| %1796 = spirv.GL.Fma %cst_f32_3, %1792, %cst_f32_2 : f32 | |
| %1797 = spirv.GL.Fma %cst_f32_5, %1792, %cst_f32_4 : f32 | |
| %1798 = spirv.GL.Fma %1796, %1793, %1795 : f32 | |
| %1799 = spirv.GL.Fma %1797, %1794, %1798 : f32 | |
| %1800 = spirv.ConvertFToS %1790 : f32 to i32 | |
| %1801 = spirv.IAdd %1800, %cst127_i32 : i32 | |
| %1802 = spirv.ShiftLeftLogical %1801, %cst23_i32 : i32, i32 | |
| %1803 = spirv.Bitcast %1802 : i32 to f32 | |
| %1804 = spirv.FMul %1799, %1803 : f32 | |
| %1805 = spirv.SLessThanEqual %1800, %cst127_i32 : i32 | |
| %1806 = spirv.SGreaterThanEqual %1800, %cst-127_i32 : i32 | |
| %1807 = spirv.FOrdEqual %1786, %cst_f32_8 : f32 | |
| %1808 = spirv.FOrdEqual %1786, %cst_f32_7 : f32 | |
| %1809 = spirv.FOrdGreaterThan %1786, %cst_f32_6 : f32 | |
| %1810 = spirv.LogicalAnd %1805, %1806 : i1 | |
| %1811 = spirv.Select %1809, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %1812 = spirv.Select %1810, %1804, %1811 : i1, f32 | |
| %1813 = spirv.Select %1808, %cst_f32_7, %1812 : i1, f32 | |
| %1814 = spirv.Select %1807, %cst_f32_6, %1813 : i1, f32 | |
| %1815 = spirv.Select %1788, %1786, %1814 : i1, f32 | |
| %1816 = spirv.FConvert %1815 : f32 to f16 | |
| %1817 = spirv.FConvert %1419 : f16 to f32 | |
| %1818 = spirv.IsNan %1817 : f32 | |
| %1819 = spirv.LogicalOr %1818, %1818 : i1 | |
| %1820 = spirv.FMul %1817, %cst_f32_0 : f32 | |
| %1821 = spirv.GL.Floor %1820 : f32 | |
| %1822 = spirv.FMul %1821, %cst_f32 : f32 | |
| %1823 = spirv.FSub %1817, %1822 : f32 | |
| %1824 = spirv.FMul %1823, %1823 : f32 | |
| %1825 = spirv.FMul %1824, %1824 : f32 | |
| %1826 = spirv.GL.Fma %cst_f32_1, %1823, %cst_f32_1 : f32 | |
| %1827 = spirv.GL.Fma %cst_f32_3, %1823, %cst_f32_2 : f32 | |
| %1828 = spirv.GL.Fma %cst_f32_5, %1823, %cst_f32_4 : f32 | |
| %1829 = spirv.GL.Fma %1827, %1824, %1826 : f32 | |
| %1830 = spirv.GL.Fma %1828, %1825, %1829 : f32 | |
| %1831 = spirv.ConvertFToS %1821 : f32 to i32 | |
| %1832 = spirv.IAdd %1831, %cst127_i32 : i32 | |
| %1833 = spirv.ShiftLeftLogical %1832, %cst23_i32 : i32, i32 | |
| %1834 = spirv.Bitcast %1833 : i32 to f32 | |
| %1835 = spirv.FMul %1830, %1834 : f32 | |
| %1836 = spirv.SLessThanEqual %1831, %cst127_i32 : i32 | |
| %1837 = spirv.SGreaterThanEqual %1831, %cst-127_i32 : i32 | |
| %1838 = spirv.FOrdEqual %1817, %cst_f32_8 : f32 | |
| %1839 = spirv.FOrdEqual %1817, %cst_f32_7 : f32 | |
| %1840 = spirv.FOrdGreaterThan %1817, %cst_f32_6 : f32 | |
| %1841 = spirv.LogicalAnd %1836, %1837 : i1 | |
| %1842 = spirv.Select %1840, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %1843 = spirv.Select %1841, %1835, %1842 : i1, f32 | |
| %1844 = spirv.Select %1839, %cst_f32_7, %1843 : i1, f32 | |
| %1845 = spirv.Select %1838, %cst_f32_6, %1844 : i1, f32 | |
| %1846 = spirv.Select %1819, %1817, %1845 : i1, f32 | |
| %1847 = spirv.FConvert %1846 : f32 to f16 | |
| %1848 = spirv.FConvert %1420 : f16 to f32 | |
| %1849 = spirv.IsNan %1848 : f32 | |
| %1850 = spirv.LogicalOr %1849, %1849 : i1 | |
| %1851 = spirv.FMul %1848, %cst_f32_0 : f32 | |
| %1852 = spirv.GL.Floor %1851 : f32 | |
| %1853 = spirv.FMul %1852, %cst_f32 : f32 | |
| %1854 = spirv.FSub %1848, %1853 : f32 | |
| %1855 = spirv.FMul %1854, %1854 : f32 | |
| %1856 = spirv.FMul %1855, %1855 : f32 | |
| %1857 = spirv.GL.Fma %cst_f32_1, %1854, %cst_f32_1 : f32 | |
| %1858 = spirv.GL.Fma %cst_f32_3, %1854, %cst_f32_2 : f32 | |
| %1859 = spirv.GL.Fma %cst_f32_5, %1854, %cst_f32_4 : f32 | |
| %1860 = spirv.GL.Fma %1858, %1855, %1857 : f32 | |
| %1861 = spirv.GL.Fma %1859, %1856, %1860 : f32 | |
| %1862 = spirv.ConvertFToS %1852 : f32 to i32 | |
| %1863 = spirv.IAdd %1862, %cst127_i32 : i32 | |
| %1864 = spirv.ShiftLeftLogical %1863, %cst23_i32 : i32, i32 | |
| %1865 = spirv.Bitcast %1864 : i32 to f32 | |
| %1866 = spirv.FMul %1861, %1865 : f32 | |
| %1867 = spirv.SLessThanEqual %1862, %cst127_i32 : i32 | |
| %1868 = spirv.SGreaterThanEqual %1862, %cst-127_i32 : i32 | |
| %1869 = spirv.FOrdEqual %1848, %cst_f32_8 : f32 | |
| %1870 = spirv.FOrdEqual %1848, %cst_f32_7 : f32 | |
| %1871 = spirv.FOrdGreaterThan %1848, %cst_f32_6 : f32 | |
| %1872 = spirv.LogicalAnd %1867, %1868 : i1 | |
| %1873 = spirv.Select %1871, %cst_f32_7, %cst_f32_9 : i1, f32 | |
| %1874 = spirv.Select %1872, %1866, %1873 : i1, f32 | |
| %1875 = spirv.Select %1870, %cst_f32_7, %1874 : i1, f32 | |
| %1876 = spirv.Select %1869, %cst_f32_6, %1875 : i1, f32 | |
| %1877 = spirv.Select %1850, %1848, %1876 : i1, f32 | |
| %1878 = spirv.FConvert %1877 : f32 to f16 | |
| %1879 = spirv.FConvert %1421 : f16 to f32 | |
| %1880 = spirv.IsNan %1879 : f32 | |
| %1881 = spirv.LogicalOr %1880, %1880 : i1 | |
| %1882 = spirv.FMul %1879, %cst_f32_0 : f32 | |
| %1883 = spirv.GL.Floor %1882 : f32 | |
| %1884 = spirv.FMul %1883, %cst_f32 : f32 | |
| %1885 = spirv.FSub %1879, %1884 : f32 | |
| %1886 = spirv.FMul %1885, %1885 : f32 | |
| %1887 = spirv.FMul %1886, %1886 : f32 | |
| %1888 = spirv.GL.Fma %cst_f32_1, %1885, %cst_f32_1 : f32 | |
| %1889 = spirv.GL.Fma %cst_f32_3, %1885, %cst_f32_2 : f32 | |
| %1890 = spirv.GL.Fma %cst_f32_5, %1885, %cst_f32_4 : f32 | |
| %1891 = spirv.GL.Fma %1889, %1886, %1888 : f32 | |
| %1892 = spirv.GL.Fma %1890, %1887, %1891 : f32 | |
| %1893 = spirv.ConvertFToS %1883 : f32 to i32 | |
| %1894 = spirv.IAdd %1893, %cst127_i32 : i32 | |
| %1895 = spirv.ShiftLeftLogical %1894, %cst23_i32 : i32, i32 | |
| %1896 = spirv.Bitcast %1895 : i32 to f32 | |
| %1897 = spirv.FMul %1892, %1896 : f32 | |
| %1898 = spirv.SLessThanEqual %1893, %cst127_i32 : i32 | |
| %1899 = |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment