Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created April 3, 2023 16:11
Show Gist options
  • Save pashu123/26b428760d94a553c69eca671a422fdb to your computer and use it in GitHub Desktop.
Save pashu123/26b428760d94a553c69eca671a422fdb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
#composite_of_1731821440b = #util.composite<1731821440xi8, [
dense_resource<__elided__> : tensor<320x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x2560xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x2560xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x5120xf16>,
dense_resource<__elided__> : tensor<2560x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x5120xf16>,
dense_resource<__elided__> : tensor<2560x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1024x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x10240xf16>,
dense_resource<__elided__> : tensor<5120x1280xf16>,
dense_resource<__elided__> : tensor<1280x1280xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x5120xf16>,
dense_resource<__elided__> : tensor<2560x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x5120xf16>,
dense_resource<__elided__> : tensor<2560x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<1024x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<640x5120xf16>,
dense_resource<__elided__> : tensor<2560x640xf16>,
dense_resource<__elided__> : tensor<640x640xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x2560xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x2560xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<1024x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320x2560xf16>,
dense_resource<__elided__> : tensor<1280x320xf16>,
dense_resource<__elided__> : tensor<320x320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1920xf16>,
dense_resource<__elided__> : tensor<1920xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1920xf16>,
dense_resource<__elided__> : tensor<1920xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<960xf16>,
dense_resource<__elided__> : tensor<960xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<960xf16>,
dense_resource<__elided__> : tensor<960xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320x36xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<640x2880xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x320xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<1280x5760xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x640xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x23040xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x2560xf16>,
dense_resource<__elided__> : tensor<1280x23040xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x2560xf16>,
dense_resource<__elided__> : tensor<1280x23040xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x2560xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x23040xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x2560xf16>,
dense_resource<__elided__> : tensor<1280x23040xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x2560xf16>,
dense_resource<__elided__> : tensor<1280x17280xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<1280x1920xf16>,
dense_resource<__elided__> : tensor<1280x11520xf16>,
dense_resource<__elided__> : tensor<640x17280xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x1920xf16>,
dense_resource<__elided__> : tensor<640x11520xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x1280xf16>,
dense_resource<__elided__> : tensor<640x8640xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<640x960xf16>,
dense_resource<__elided__> : tensor<640x5760xf16>,
dense_resource<__elided__> : tensor<320x8640xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x960xf16>,
dense_resource<__elided__> : tensor<320x5760xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x640xf16>,
dense_resource<__elided__> : tensor<320x5760xf16>,
dense_resource<__elided__> : tensor<320x2880xf16>,
dense_resource<__elided__> : tensor<320x640xf16>,
dense_resource<__elided__> : tensor<4x2880xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<5120xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<5120xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<10240xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<1280xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<5120xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<5120xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<5120xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<640xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<2560xf16>,
dense_resource<__elided__> : tensor<320xf16>,
dense_resource<__elided__> : tensor<320xf16>,
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}>
#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
#pipeline_layout1 = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
#pipeline_layout2 = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout3 = #hal.pipeline.layout<push_constants = 3, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout4 = #hal.pipeline.layout<push_constants = 3, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
#pipeline_layout5 = #hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
#pipeline_layout6 = #hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout7 = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout8 = #hal.pipeline.layout<push_constants = 5, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout9 = #hal.pipeline.layout<push_constants = 6, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout10 = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
#pipeline_layout11 = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
#translation1 = #iree_codegen.translation_info<SPIRVBaseVectorize>
#translation2 = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize store_stage = 0>
#translation3 = #iree_codegen.translation_info<SPIRVSubgroupReduce>
#translation4 = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize pipeline_depth = 1 store_stage = 0>
#translation5 = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize pipeline_depth = 1>
#device_target_vulkan = #hal.device.target<"vulkan", {executable_targets = [#executable_target_vulkan_spirv_fb], legacy_sync}>
module attributes {hal.device.targets = [#device_target_vulkan], torch.debug_module_name = "_lambda"} {
util.global private mutable @_constant__timepoint : !hal.fence
util.global private @_constant : !hal.buffer
util.initializer {
%0 = util.null : !hal.fence
%c1731821440 = arith.constant 1731821440 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_1731821440b
%device = hal.ex.shared_device : !hal.device
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator
%did_map, %mapped = hal.allocator.try_map<%allocator : !hal.allocator> source(%buffer_cst : !util.buffer)[%c0, %c1731821440] type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|SharingImmutable") : i1, !hal.buffer
cf.cond_br %did_map, ^bb2(%mapped, %0 : !hal.buffer, !hal.fence), ^bb1
^bb1: // pred: ^bb0
%device_0 = hal.ex.shared_device : !hal.device
%allocator_1 = hal.device.allocator<%device_0 : !hal.device> : !hal.allocator
%mapped_2 = hal.allocator.allocate.initialized<%allocator_1 : !hal.allocator> source(%buffer_cst : !util.buffer)[%c0, %c1731821440] type("HostVisible|HostCoherent|HostLocal|DeviceVisible") usage("TransferSource|TransferTarget|Transfer|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer
%device_3 = hal.ex.shared_device : !hal.device
%allocator_4 = hal.device.allocator<%device_3 : !hal.device> : !hal.allocator
%buffer = hal.allocator.allocate<%allocator_4 : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|SharingImmutable") : !hal.buffer{%c1731821440}
%device_5 = hal.ex.shared_device : !hal.device
%c-1_i64 = arith.constant -1 : i64
%cmd = hal.command_buffer.create device(%device_5 : !hal.device) mode("OneShot|AllowInlineExecution") categories(Transfer) : !hal.command_buffer
hal.command_buffer.copy_buffer<%cmd : !hal.command_buffer> source(%mapped_2 : !hal.buffer)[%c0] target(%buffer : !hal.buffer)[%c0] length(%c1731821440)
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
hal.command_buffer.finalize<%cmd : !hal.command_buffer>
%1 = util.null : !hal.fence
%fence = hal.fence.create device(%device_5 : !hal.device) flags("None") : !hal.fence
hal.device.queue.execute<%device_5 : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd])
cf.br ^bb2(%buffer, %fence : !hal.buffer, !hal.fence)
^bb2(%2: !hal.buffer, %3: !hal.fence): // 2 preds: ^bb0, ^bb1
util.global.store %2, @_constant : !hal.buffer
util.global.store %3, @_constant__timepoint : !hal.fence
util.initializer.return
}
hal.executable private @forward_dispatch_0 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_0_generic_2x160 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [StorageBuffer16BitAccess, Shader, Float16, Int64], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_0_generic_2x160() "None" {
%cst160_i32 = spirv.Constant 160 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f32 = spirv.Constant 0.693147182 : f32
%cst_f32_0 = spirv.Constant 1.44269502 : f32
%cst_f32_1 = spirv.Constant 1.000000e+00 : f32
%cst_f32_2 = spirv.Constant 0.499705136 : f32
%cst_f32_3 = spirv.Constant 0.168738902 : f32
%cst_f32_4 = spirv.Constant 0.0366896503 : f32
%cst_f32_5 = spirv.Constant 1.314350e-02 : f32
%cst23_i32 = spirv.Constant 23 : i32
%cst127_i32 = spirv.Constant 127 : i32
%cst_f32_6 = spirv.Constant 0.000000e+00 : f32
%cst_f32_7 = spirv.Constant 0x7F800000 : f32
%cst_f32_8 = spirv.Constant 0xFF800000 : f32
%cst_f32_9 = spirv.Constant 1.17549435E-38 : f32
%cst-127_i32 = spirv.Constant -127 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%cst_f32_10 = spirv.Constant -9.21033954 : f32
%cst_f16_11 = spirv.Constant 1.600000e+02 : f16
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32>
%8 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x f16, stride=2> [0])>, StorageBuffer>, i32, i32
%9 = spirv.Load "StorageBuffer" %8 : f16
%10 = spirv.IMul %3, %cst32_i32 : i32
%11 = spirv.IAdd %10, %5 : i32
%12 = spirv.SConvert %11 : i32 to i64
%13 = spirv.ConvertSToF %12 : i64 to f16
%14 = spirv.FAdd %13, %cst_f16 : f16
%15 = spirv.FConvert %cst_f32_10 : f32 to f16
%16 = spirv.FMul %14, %15 : f16
%17 = spirv.FDiv %16, %cst_f16_11 : f16
%18 = spirv.FConvert %17 : f16 to f32
%19 = spirv.IsNan %18 : f32
%20 = spirv.LogicalOr %19, %19 : i1
%21 = spirv.FMul %18, %cst_f32_0 : f32
%22 = spirv.GL.Floor %21 : f32
%23 = spirv.FMul %22, %cst_f32 : f32
%24 = spirv.FSub %18, %23 : f32
%25 = spirv.FMul %24, %24 : f32
%26 = spirv.FMul %25, %25 : f32
%27 = spirv.GL.Fma %cst_f32_1, %24, %cst_f32_1 : f32
%28 = spirv.GL.Fma %cst_f32_3, %24, %cst_f32_2 : f32
%29 = spirv.GL.Fma %cst_f32_5, %24, %cst_f32_4 : f32
%30 = spirv.GL.Fma %28, %25, %27 : f32
%31 = spirv.GL.Fma %29, %26, %30 : f32
%32 = spirv.ConvertFToS %22 : f32 to i32
%33 = spirv.IAdd %32, %cst127_i32 : i32
%34 = spirv.ShiftLeftLogical %33, %cst23_i32 : i32, i32
%35 = spirv.Bitcast %34 : i32 to f32
%36 = spirv.FMul %31, %35 : f32
%37 = spirv.SLessThanEqual %32, %cst127_i32 : i32
%38 = spirv.SGreaterThanEqual %32, %cst-127_i32 : i32
%39 = spirv.FOrdEqual %18, %cst_f32_8 : f32
%40 = spirv.FOrdEqual %18, %cst_f32_7 : f32
%41 = spirv.FOrdGreaterThan %18, %cst_f32_6 : f32
%42 = spirv.LogicalAnd %37, %38 : i1
%43 = spirv.Select %41, %cst_f32_7, %cst_f32_9 : i1, f32
%44 = spirv.Select %42, %36, %43 : i1, f32
%45 = spirv.Select %40, %cst_f32_7, %44 : i1, f32
%46 = spirv.Select %39, %cst_f32_6, %45 : i1, f32
%47 = spirv.Select %20, %18, %46 : i1, f32
%48 = spirv.FConvert %47 : f32 to f16
%49 = spirv.FMul %9, %48 : f16
%50 = spirv.IMul %1, %cst160_i32 : i32
%51 = spirv.IMul %7, %cst160_i32 : i32
%52 = spirv.IAdd %50, %51 : i32
%53 = spirv.IAdd %52, %5 : i32
%54 = spirv.IAdd %53, %10 : i32
%55 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %55, %49 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_0_generic_2x160, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_0_generic_2x160 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_1 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_1_generic_320 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index):
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
hal.return %c10, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_1_generic_320() "None" {
%cst77152_i32 = spirv.Constant 77152 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 0.636619746 : f32
%cst_f32_0 = spirv.Constant 1.57079637 : f32
%cst_f32_1 = spirv.Constant 1.000000e+00 : f32
%cst_f32_2 = spirv.Constant -1.000000e+00 : f32
%cst_f32_3 = spirv.Constant -0.166666672 : f32
%cst_f32_4 = spirv.Constant 0.00833334774 : f32
%cst_f32_5 = spirv.Constant -1.98426045E-4 : f32
%cst_f32_6 = spirv.Constant 2.76001265E-6 : f32
%cst_f32_7 = spirv.Constant -2.50293279E-8 : f32
%cst_f32_8 = spirv.Constant -5.000000e-01 : f32
%cst_f32_9 = spirv.Constant 0.0416666418 : f32
%cst_f32_10 = spirv.Constant -0.00138883304 : f32
%cst_f32_11 = spirv.Constant 2.47562348E-5 : f32
%cst_f32_12 = spirv.Constant -2.59630184E-7 : f32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%2 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32>
%4 = spirv.IMul %1, %cst32_i32 : i32
%5 = spirv.IAdd %3, %4 : i32
%6 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %5] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%7 = spirv.Load "StorageBuffer" %6 : f16
%8 = spirv.FConvert %7 : f16 to f32
%9 = spirv.FMul %8, %cst_f32 : f32
%10 = spirv.GL.Floor %9 : f32
%11 = spirv.FMul %10, %cst_f32_0 : f32
%12 = spirv.FSub %8, %11 : f32
%13 = spirv.ConvertFToS %10 : f32 to i32
%14 = spirv.BitwiseAnd %13, %cst3_i32 : i32
%15 = spirv.IEqual %14, %cst1_i32 : i32
%16 = spirv.IEqual %14, %cst3_i32 : i32
%17 = spirv.LogicalOr %15, %16 : i1
%18 = spirv.SGreaterThan %14, %cst1_i32 : i32
%19 = spirv.FMul %12, %12 : f32
%20 = spirv.Select %17, %cst_f32_1, %12 : i1, f32
%21 = spirv.Select %17, %cst_f32_8, %cst_f32_3 : i1, f32
%22 = spirv.Select %17, %cst_f32_9, %cst_f32_4 : i1, f32
%23 = spirv.Select %17, %cst_f32_10, %cst_f32_5 : i1, f32
%24 = spirv.Select %17, %cst_f32_11, %cst_f32_6 : i1, f32
%25 = spirv.Select %17, %cst_f32_12, %cst_f32_7 : i1, f32
%26 = spirv.GL.Fma %19, %25, %24 : f32
%27 = spirv.GL.Fma %19, %26, %23 : f32
%28 = spirv.GL.Fma %19, %27, %22 : f32
%29 = spirv.GL.Fma %19, %28, %21 : f32
%30 = spirv.GL.Fma %19, %29, %cst_f32_1 : f32
%31 = spirv.FMul %20, %30 : f32
%32 = spirv.FMul %31, %cst_f32_2 : f32
%33 = spirv.Select %18, %32, %31 : i1, f32
%34 = spirv.FConvert %33 : f32 to f16
%35 = spirv.IAdd %5, %cst77152_i32 : i32
%36 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %36, %34 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_1_generic_320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_1_generic_320 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_2 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_2_generic_320 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index):
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
hal.return %c10, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_2_generic_320() "None" {
%cst78112_i32 = spirv.Constant 78112 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 0.636619746 : f32
%cst_f32_0 = spirv.Constant 1.57079637 : f32
%cst_f32_1 = spirv.Constant 1.000000e+00 : f32
%cst_f32_2 = spirv.Constant -1.000000e+00 : f32
%cst_f32_3 = spirv.Constant -0.166666672 : f32
%cst_f32_4 = spirv.Constant 0.00833334774 : f32
%cst_f32_5 = spirv.Constant -1.98426045E-4 : f32
%cst_f32_6 = spirv.Constant 2.76001265E-6 : f32
%cst_f32_7 = spirv.Constant -2.50293279E-8 : f32
%cst_f32_8 = spirv.Constant -5.000000e-01 : f32
%cst_f32_9 = spirv.Constant 0.0416666418 : f32
%cst_f32_10 = spirv.Constant -0.00138883304 : f32
%cst_f32_11 = spirv.Constant 2.47562348E-5 : f32
%cst_f32_12 = spirv.Constant -2.59630184E-7 : f32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%2 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32>
%4 = spirv.IMul %1, %cst32_i32 : i32
%5 = spirv.IAdd %3, %4 : i32
%6 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %5] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%7 = spirv.Load "StorageBuffer" %6 : f16
%8 = spirv.FConvert %7 : f16 to f32
%9 = spirv.FMul %8, %cst_f32 : f32
%10 = spirv.GL.Floor %9 : f32
%11 = spirv.FMul %10, %cst_f32_0 : f32
%12 = spirv.FSub %8, %11 : f32
%13 = spirv.ConvertFToS %10 : f32 to i32
%14 = spirv.BitwiseAnd %13, %cst3_i32 : i32
%15 = spirv.IEqual %14, %cst0_i32 : i32
%16 = spirv.IEqual %14, %cst1_i32 : i32
%17 = spirv.IEqual %14, %cst2_i32 : i32
%18 = spirv.LogicalOr %15, %17 : i1
%19 = spirv.LogicalOr %16, %17 : i1
%20 = spirv.FMul %12, %12 : f32
%21 = spirv.Select %18, %cst_f32_1, %12 : i1, f32
%22 = spirv.Select %18, %cst_f32_8, %cst_f32_3 : i1, f32
%23 = spirv.Select %18, %cst_f32_9, %cst_f32_4 : i1, f32
%24 = spirv.Select %18, %cst_f32_10, %cst_f32_5 : i1, f32
%25 = spirv.Select %18, %cst_f32_11, %cst_f32_6 : i1, f32
%26 = spirv.Select %18, %cst_f32_12, %cst_f32_7 : i1, f32
%27 = spirv.GL.Fma %20, %26, %25 : f32
%28 = spirv.GL.Fma %20, %27, %24 : f32
%29 = spirv.GL.Fma %20, %28, %23 : f32
%30 = spirv.GL.Fma %20, %29, %22 : f32
%31 = spirv.GL.Fma %20, %30, %cst_f32_1 : f32
%32 = spirv.FMul %21, %31 : f32
%33 = spirv.FMul %32, %cst_f32_2 : f32
%34 = spirv.Select %19, %33, %32 : i1, f32
%35 = spirv.FConvert %34 : f32 to f16
%36 = spirv.IAdd %5, %cst78112_i32 : i32
%37 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %36] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %37, %35 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_2_generic_320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_2_generic_320 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_3 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_3 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_3() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%20 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32>
%22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[1 : i32] : vector<3xi32>
%24 = spirv.IMul %19, %cst160_i32 : i32
%25 = spirv.IMul %23, %cst160_i32 : i32
%26 = spirv.IAdd %24, %25 : i32
%27 = spirv.IAdd %26, %21 : i32
%28 = spirv.IMul %17, %cst32_i32 : i32
%29 = spirv.IAdd %27, %28 : i32
%30 = spirv.IAdd %29, %9 : i32
%31 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%32 = spirv.Load "StorageBuffer" %31 : f16
%33 = spirv.IMul %19, %cst320_i32 : i32
%34 = spirv.IMul %23, %cst320_i32 : i32
%35 = spirv.IAdd %33, %34 : i32
%36 = spirv.IAdd %35, %21 : i32
%37 = spirv.IAdd %36, %28 : i32
%38 = spirv.IAdd %37, %15 : i32
%39 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %39, %32 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_3 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_4 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_4 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_4() "None" {
%cst77632_i32 = spirv.Constant 77632 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst78112_i32 = spirv.Constant 78112 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32>
%8 = spirv.IMul %3, %cst160_i32 : i32
%9 = spirv.IMul %7, %cst160_i32 : i32
%10 = spirv.IAdd %8, %9 : i32
%11 = spirv.IAdd %10, %5 : i32
%12 = spirv.IMul %1, %cst32_i32 : i32
%13 = spirv.IAdd %11, %12 : i32
%14 = spirv.IAdd %13, %cst78112_i32 : i32
%15 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %14] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%16 = spirv.Load "StorageBuffer" %15 : f16
%17 = spirv.IMul %3, %cst320_i32 : i32
%18 = spirv.IMul %7, %cst320_i32 : i32
%19 = spirv.IAdd %17, %18 : i32
%20 = spirv.IAdd %19, %5 : i32
%21 = spirv.IAdd %20, %12 : i32
%22 = spirv.IAdd %21, %cst77632_i32 : i32
%23 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %23, %16 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_4 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_6 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_6 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_6() "None" {
%cst160_i32 = spirv.Constant 160 : i32
%cst77472_i32 = spirv.Constant 77472 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32>
%8 = spirv.IMul %3, %cst320_i32 : i32
%9 = spirv.IMul %7, %cst320_i32 : i32
%10 = spirv.IAdd %8, %9 : i32
%11 = spirv.IAdd %10, %5 : i32
%12 = spirv.IMul %1, %cst32_i32 : i32
%13 = spirv.IAdd %11, %12 : i32
%14 = spirv.IAdd %13, %cst77472_i32 : i32
%15 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %14] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%16 = spirv.Load "StorageBuffer" %15 : f16
%17 = spirv.IAdd %13, %cst160_i32 : i32
%18 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %17] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %18, %16 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_6, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_6 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_7 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_7_matmul_32x1280x320 ordinal(0) layout(#pipeline_layout2) attributes {subgroup_size = 32 : index, translation_info = #translation2, workgroup_size = [128 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_7_matmul_32x1280x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%false = spirv.Constant false
%cst1392_i32 = spirv.Constant 1392 : i32
%cst1394_i32 = spirv.Constant 1394 : i32
%cst1396_i32 = spirv.Constant 1396 : i32
%cst1398_i32 = spirv.Constant 1398 : i32
%cst2560_i32 = spirv.Constant 2560 : i32
%cst534_i32 = spirv.Constant 534 : i32
%cst532_i32 = spirv.Constant 532 : i32
%cst530_i32 = spirv.Constant 530 : i32
%cst528_i32 = spirv.Constant 528 : i32
%cst6_i32 = spirv.Constant 6 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst33_i32 = spirv.Constant 33 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst1024_i32 = spirv.Constant 1024 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[1 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[0 : i32] : vector<3xi32>
%7 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%8 = spirv.CompositeExtract %7[1 : i32] : vector<3xi32>
%9 = spirv.IMul %6, %cst8_i32 : i32
%10 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%11 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%12 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%13 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>)
^bb1(%39: i32, %40: !spirv.coopmatrix<16x16xf16, Subgroup>, %41: !spirv.coopmatrix<16x16xf16, Subgroup>, %42: !spirv.coopmatrix<16x16xf16, Subgroup>, %43: !spirv.coopmatrix<16x16xf16, Subgroup>): // 2 preds: ^bb0, ^bb2
%44 = spirv.SLessThan %39, %cst320_i32 : i32
spirv.BranchConditional %44, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.mlir.loop {
spirv.Branch ^bb1(%8 : i32)
^bb1(%90: i32): // 2 preds: ^bb0, ^bb2
%91 = spirv.SLessThan %90, %cst16_i32 : i32
spirv.BranchConditional %91, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.mlir.loop {
spirv.Branch ^bb1(%9 : i32)
^bb1(%93: i32): // 2 preds: ^bb0, ^bb2
%94 = spirv.SLessThan %93, %cst32_i32 : i32
spirv.BranchConditional %94, ^bb2, ^bb3
^bb2: // pred: ^bb1
%95 = spirv.IMul %90, %cst40_i32 : i32
%96 = spirv.IMul %2, %cst640_i32 : i32
%97 = spirv.IAdd %95, %96 : i32
%98 = spirv.IAdd %39, %93 : i32
%99 = spirv.SLessThan %98, %cst0_i32 : i32
%100 = spirv.ISub %cst-1_i32, %98 : i32
%101 = spirv.Select %99, %100, %98 : i1, i32
%102 = spirv.SDiv %101, %cst8_i32 : i32
%103 = spirv.ISub %cst-1_i32, %102 : i32
%104 = spirv.Select %99, %103, %102 : i1, i32
%105 = spirv.IAdd %97, %104 : i32
%106 = spirv.IAdd %105, %cst80_i32 : i32
%107 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%108 = spirv.Load "StorageBuffer" %107 : vector<4xf32>
%109 = spirv.IMul %90, %cst5_i32 : i32
%110 = spirv.SLessThan %93, %cst0_i32 : i32
%111 = spirv.ISub %cst-1_i32, %93 : i32
%112 = spirv.Select %110, %111, %93 : i1, i32
%113 = spirv.SDiv %112, %cst8_i32 : i32
%114 = spirv.ISub %cst-1_i32, %113 : i32
%115 = spirv.Select %110, %114, %113 : i1, i32
%116 = spirv.IAdd %109, %115 : i32
%117 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %116] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %117, %108 : vector<4xf32>
%118 = spirv.IAdd %93, %cst1024_i32 : i32
spirv.Branch ^bb1(%118 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%92 = spirv.IAdd %90, %cst1_i32 : i32
spirv.Branch ^bb1(%92 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.mlir.loop {
spirv.Branch ^bb1(%8 : i32)
^bb1(%90: i32): // 2 preds: ^bb0, ^bb2
%91 = spirv.SLessThan %90, %cst32_i32 : i32
spirv.BranchConditional %91, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.mlir.loop {
spirv.Branch ^bb1(%9 : i32)
^bb1(%93: i32): // 2 preds: ^bb0, ^bb2
%94 = spirv.SLessThan %93, %cst256_i32 : i32
spirv.BranchConditional %94, ^bb2, ^bb3
^bb2: // pred: ^bb1
%95 = spirv.IMul %39, %cst160_i32 : i32
%96 = spirv.IMul %90, %cst160_i32 : i32
%97 = spirv.IAdd %95, %96 : i32
%98 = spirv.IMul %4, %cst32_i32 : i32
%99 = spirv.IAdd %97, %98 : i32
%100 = spirv.SLessThan %93, %cst0_i32 : i32
%101 = spirv.ISub %cst-1_i32, %93 : i32
%102 = spirv.Select %100, %101, %93 : i1, i32
%103 = spirv.SDiv %102, %cst8_i32 : i32
%104 = spirv.ISub %cst-1_i32, %103 : i32
%105 = spirv.Select %100, %104, %103 : i1, i32
%106 = spirv.IAdd %99, %105 : i32
%107 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%108 = spirv.Load "StorageBuffer" %107 : vector<4xf32>
%109 = spirv.IMul %90, %cst33_i32 : i32
%110 = spirv.IAdd %109, %105 : i32
%111 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %110] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %111, %108 : vector<4xf32>
%112 = spirv.IAdd %93, %cst1024_i32 : i32
spirv.Branch ^bb1(%112 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%92 = spirv.IAdd %90, %cst1_i32 : i32
spirv.Branch ^bb1(%92 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%45 = spirv.IMul %8, %cst80_i32 : i32
%46 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %45] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
%47 = spirv.NV.CooperativeMatrixLoad %46, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%48 = spirv.IAdd %45, %cst2_i32 : i32
%49 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
%50 = spirv.NV.CooperativeMatrixLoad %49, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%51 = spirv.SLessThan %6, %cst0_i32 : i32
%52 = spirv.ISub %cst-1_i32, %6 : i32
%53 = spirv.Select %51, %52, %6 : i1, i32
%54 = spirv.SDiv %53, %cst32_i32 : i32
%55 = spirv.ISub %cst-1_i32, %54 : i32
%56 = spirv.Select %51, %55, %54 : i1, i32
%57 = spirv.IMul %56, %cst8_i32 : i32
%58 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%59 = spirv.NV.CooperativeMatrixLoad %58, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%60 = spirv.IAdd %57, %cst2_i32 : i32
%61 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%62 = spirv.NV.CooperativeMatrixLoad %61, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%63 = spirv.IAdd %57, %cst4_i32 : i32
%64 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %63] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%65 = spirv.NV.CooperativeMatrixLoad %64, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%66 = spirv.IAdd %57, %cst6_i32 : i32
%67 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%68 = spirv.NV.CooperativeMatrixLoad %67, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%69 = spirv.IAdd %57, %cst528_i32 : i32
%70 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%71 = spirv.NV.CooperativeMatrixLoad %70, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%72 = spirv.IAdd %57, %cst530_i32 : i32
%73 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%74 = spirv.NV.CooperativeMatrixLoad %73, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%75 = spirv.IAdd %57, %cst532_i32 : i32
%76 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %75] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%77 = spirv.NV.CooperativeMatrixLoad %76, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%78 = spirv.IAdd %57, %cst534_i32 : i32
%79 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %78] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%80 = spirv.NV.CooperativeMatrixLoad %79, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%81 = spirv.NV.CooperativeMatrixMulAdd %47, %59, %40 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%82 = spirv.NV.CooperativeMatrixMulAdd %50, %71, %81 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%83 = spirv.NV.CooperativeMatrixMulAdd %47, %62, %41 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%84 = spirv.NV.CooperativeMatrixMulAdd %50, %74, %83 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%85 = spirv.NV.CooperativeMatrixMulAdd %47, %65, %42 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%86 = spirv.NV.CooperativeMatrixMulAdd %50, %77, %85 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%87 = spirv.NV.CooperativeMatrixMulAdd %47, %68, %43 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%88 = spirv.NV.CooperativeMatrixMulAdd %50, %80, %87 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %10, %82 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %11, %84 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %12, %86 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %13, %88 : !spirv.coopmatrix<16x16xf16, Subgroup>
%89 = spirv.IAdd %39, %cst32_i32 : i32
spirv.Branch ^bb1(%89, %82, %84, %86, %88 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%14 = spirv.Load "Function" %13 : !spirv.coopmatrix<16x16xf16, Subgroup>
%15 = spirv.Load "Function" %12 : !spirv.coopmatrix<16x16xf16, Subgroup>
%16 = spirv.Load "Function" %11 : !spirv.coopmatrix<16x16xf16, Subgroup>
%17 = spirv.Load "Function" %10 : !spirv.coopmatrix<16x16xf16, Subgroup>
%18 = spirv.IMul %2, %cst2560_i32 : i32
%19 = spirv.IMul %8, %cst2560_i32 : i32
%20 = spirv.IAdd %18, %19 : i32
%21 = spirv.IMul %4, %cst32_i32 : i32
%22 = spirv.IAdd %20, %21 : i32
%23 = spirv.SLessThan %6, %cst0_i32 : i32
%24 = spirv.ISub %cst-1_i32, %6 : i32
%25 = spirv.Select %23, %24, %6 : i1, i32
%26 = spirv.SDiv %25, %cst32_i32 : i32
%27 = spirv.ISub %cst-1_i32, %26 : i32
%28 = spirv.Select %23, %27, %26 : i1, i32
%29 = spirv.IMul %28, %cst8_i32 : i32
%30 = spirv.IAdd %22, %29 : i32
%31 = spirv.IAdd %30, %cst1398_i32 : i32
%32 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %32, %14, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%33 = spirv.IAdd %30, %cst1396_i32 : i32
%34 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %33] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %34, %15, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%35 = spirv.IAdd %30, %cst1394_i32 : i32
%36 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %36, %16, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%37 = spirv.IAdd %30, %cst1392_i32 : i32
%38 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %38, %17, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_7_matmul_32x1280x320, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_7_matmul_32x1280x320 "LocalSize", 128, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_8 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_8_generic_2x1280 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c10 = arith.constant 10 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c10, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_8_generic_2x1280() "None" {
%cst2784_i32 = spirv.Constant 2784 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst216398080_i32 = spirv.Constant 216398080 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32>
%cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32>
%cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32>
%cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32>
%cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32>
%cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32>
%cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32>
%cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32>
%cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32>
%cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32>
%cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32>
%cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32>
%cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32>
%cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16>
%cst0_i32 = spirv.Constant 0 : i32
%cst13024_i32 = spirv.Constant 13024 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%6 = spirv.IMul %3, %cst32_i32 : i32
%7 = spirv.IAdd %6, %5 : i32
%8 = spirv.IAdd %7, %cst216398080_i32 : i32
%9 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %8] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%10 = spirv.Load "StorageBuffer" %9 : vector<4xf16>
%11 = spirv.IMul %1, %cst320_i32 : i32
%12 = spirv.IAdd %7, %11 : i32
%13 = spirv.IAdd %12, %cst2784_i32 : i32
%14 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %13] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%15 = spirv.Load "StorageBuffer" %14 : vector<4xf16>
%16 = spirv.FAdd %10, %15 : vector<4xf16>
%17 = spirv.FNegate %16 : vector<4xf16>
%18 = spirv.FConvert %17 : vector<4xf16> to vector<4xf32>
%19 = spirv.IsNan %18 : vector<4xf32>
%20 = spirv.LogicalOr %19, %19 : vector<4xi1>
%21 = spirv.FMul %18, %cst_vec_4xf32_0 : vector<4xf32>
%22 = spirv.GL.Floor %21 : vector<4xf32>
%23 = spirv.FMul %22, %cst_vec_4xf32 : vector<4xf32>
%24 = spirv.FSub %18, %23 : vector<4xf32>
%25 = spirv.FMul %24, %24 : vector<4xf32>
%26 = spirv.FMul %25, %25 : vector<4xf32>
%27 = spirv.GL.Fma %cst_vec_4xf32_1, %24, %cst_vec_4xf32_1 : vector<4xf32>
%28 = spirv.GL.Fma %cst_vec_4xf32_3, %24, %cst_vec_4xf32_2 : vector<4xf32>
%29 = spirv.GL.Fma %cst_vec_4xf32_5, %24, %cst_vec_4xf32_4 : vector<4xf32>
%30 = spirv.GL.Fma %28, %25, %27 : vector<4xf32>
%31 = spirv.GL.Fma %29, %26, %30 : vector<4xf32>
%32 = spirv.ConvertFToS %22 : vector<4xf32> to vector<4xi32>
%33 = spirv.IAdd %32, %cst_vec_4xi32_6 : vector<4xi32>
%34 = spirv.ShiftLeftLogical %33, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%35 = spirv.Bitcast %34 : vector<4xi32> to vector<4xf32>
%36 = spirv.FMul %31, %35 : vector<4xf32>
%37 = spirv.SLessThanEqual %32, %cst_vec_4xi32_6 : vector<4xi32>
%38 = spirv.SGreaterThanEqual %32, %cst_vec_4xi32_11 : vector<4xi32>
%39 = spirv.FOrdEqual %18, %cst_vec_4xf32_9 : vector<4xf32>
%40 = spirv.FOrdEqual %18, %cst_vec_4xf32_8 : vector<4xf32>
%41 = spirv.FOrdGreaterThan %18, %cst_vec_4xf32_7 : vector<4xf32>
%42 = spirv.LogicalAnd %37, %38 : vector<4xi1>
%43 = spirv.Select %41, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%44 = spirv.Select %42, %36, %43 : vector<4xi1>, vector<4xf32>
%45 = spirv.Select %40, %cst_vec_4xf32_8, %44 : vector<4xi1>, vector<4xf32>
%46 = spirv.Select %39, %cst_vec_4xf32_7, %45 : vector<4xi1>, vector<4xf32>
%47 = spirv.Select %20, %18, %46 : vector<4xi1>, vector<4xf32>
%48 = spirv.FConvert %47 : vector<4xf32> to vector<4xf16>
%49 = spirv.FAdd %48, %cst_vec_4xf16 : vector<4xf16>
%50 = spirv.FDiv %cst_vec_4xf16, %49 : vector<4xf16>
%51 = spirv.FMul %50, %16 : vector<4xf16>
%52 = spirv.IAdd %12, %cst13024_i32 : i32
%53 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %53, %51 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_8_generic_2x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_8_generic_2x1280 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_9 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_9_matmul_32x1280x1280 ordinal(0) layout(#pipeline_layout3) attributes {subgroup_size = 32 : index, translation_info = #translation2, workgroup_size = [128 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_9_matmul_32x1280x1280() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%false = spirv.Constant false
%cst534_i32 = spirv.Constant 534 : i32
%cst532_i32 = spirv.Constant 532 : i32
%cst530_i32 = spirv.Constant 530 : i32
%cst528_i32 = spirv.Constant 528 : i32
%cst6_i32 = spirv.Constant 6 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst33_i32 = spirv.Constant 33 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst2560_i32 = spirv.Constant 2560 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst1024_i32 = spirv.Constant 1024 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%1 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%2 = spirv.Load "PushConstant" %1 : i32
%3 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%4 = spirv.Load "PushConstant" %3 : i32
%5 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%6 = spirv.Load "PushConstant" %5 : i32
%7 = spirv.SLessThan %2, %cst0_i32 : i32
%8 = spirv.ISub %cst-1_i32, %2 : i32
%9 = spirv.Select %7, %8, %2 : i1, i32
%10 = spirv.SDiv %9, %cst16_i32 : i32
%11 = spirv.ISub %cst-1_i32, %10 : i32
%12 = spirv.Select %7, %11, %10 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%13 = spirv.SLessThan %4, %cst0_i32 : i32
%14 = spirv.ISub %cst-1_i32, %4 : i32
%15 = spirv.Select %13, %14, %4 : i1, i32
%16 = spirv.SDiv %15, %cst16_i32 : i32
%17 = spirv.ISub %cst-1_i32, %16 : i32
%18 = spirv.Select %13, %17, %16 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%19 = spirv.SLessThan %6, %cst0_i32 : i32
%20 = spirv.ISub %cst-1_i32, %6 : i32
%21 = spirv.Select %19, %20, %6 : i1, i32
%22 = spirv.SDiv %21, %cst16_i32 : i32
%23 = spirv.ISub %cst-1_i32, %22 : i32
%24 = spirv.Select %19, %23, %22 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%25 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%26 = spirv.CompositeExtract %25[1 : i32] : vector<3xi32>
%27 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%28 = spirv.CompositeExtract %27[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%29 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%30 = spirv.CompositeExtract %29[0 : i32] : vector<3xi32>
%31 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%32 = spirv.CompositeExtract %31[1 : i32] : vector<3xi32>
%33 = spirv.IMul %30, %cst8_i32 : i32
%34 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%35 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%36 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%37 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>)
^bb1(%63: i32, %64: !spirv.coopmatrix<16x16xf16, Subgroup>, %65: !spirv.coopmatrix<16x16xf16, Subgroup>, %66: !spirv.coopmatrix<16x16xf16, Subgroup>, %67: !spirv.coopmatrix<16x16xf16, Subgroup>): // 2 preds: ^bb0, ^bb2
%68 = spirv.SLessThan %63, %cst1280_i32 : i32
spirv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.mlir.loop {
spirv.Branch ^bb1(%32 : i32)
^bb1(%114: i32): // 2 preds: ^bb0, ^bb2
%115 = spirv.SLessThan %114, %cst16_i32 : i32
spirv.BranchConditional %115, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.mlir.loop {
spirv.Branch ^bb1(%33 : i32)
^bb1(%117: i32): // 2 preds: ^bb0, ^bb2
%118 = spirv.SLessThan %117, %cst32_i32 : i32
spirv.BranchConditional %118, ^bb2, ^bb3
^bb2: // pred: ^bb1
%119 = spirv.IMul %114, %cst160_i32 : i32
%120 = spirv.IMul %26, %cst2560_i32 : i32
%121 = spirv.IAdd %119, %120 : i32
%122 = spirv.IAdd %121, %12 : i32
%123 = spirv.IAdd %63, %117 : i32
%124 = spirv.SLessThan %123, %cst0_i32 : i32
%125 = spirv.ISub %cst-1_i32, %123 : i32
%126 = spirv.Select %124, %125, %123 : i1, i32
%127 = spirv.SDiv %126, %cst8_i32 : i32
%128 = spirv.ISub %cst-1_i32, %127 : i32
%129 = spirv.Select %124, %128, %127 : i1, i32
%130 = spirv.IAdd %122, %129 : i32
%131 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %130] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%132 = spirv.Load "StorageBuffer" %131 : vector<4xf32>
%133 = spirv.IMul %114, %cst5_i32 : i32
%134 = spirv.SLessThan %117, %cst0_i32 : i32
%135 = spirv.ISub %cst-1_i32, %117 : i32
%136 = spirv.Select %134, %135, %117 : i1, i32
%137 = spirv.SDiv %136, %cst8_i32 : i32
%138 = spirv.ISub %cst-1_i32, %137 : i32
%139 = spirv.Select %134, %138, %137 : i1, i32
%140 = spirv.IAdd %133, %139 : i32
%141 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %140] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %141, %132 : vector<4xf32>
%142 = spirv.IAdd %117, %cst1024_i32 : i32
spirv.Branch ^bb1(%142 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%116 = spirv.IAdd %114, %cst1_i32 : i32
spirv.Branch ^bb1(%116 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.mlir.loop {
spirv.Branch ^bb1(%32 : i32)
^bb1(%114: i32): // 2 preds: ^bb0, ^bb2
%115 = spirv.SLessThan %114, %cst32_i32 : i32
spirv.BranchConditional %115, ^bb2, ^bb3
^bb2: // pred: ^bb1
spirv.mlir.loop {
spirv.Branch ^bb1(%33 : i32)
^bb1(%117: i32): // 2 preds: ^bb0, ^bb2
%118 = spirv.SLessThan %117, %cst256_i32 : i32
spirv.BranchConditional %118, ^bb2, ^bb3
^bb2: // pred: ^bb1
%119 = spirv.IMul %63, %cst160_i32 : i32
%120 = spirv.IMul %114, %cst160_i32 : i32
%121 = spirv.IAdd %119, %120 : i32
%122 = spirv.IMul %28, %cst32_i32 : i32
%123 = spirv.IAdd %121, %122 : i32
%124 = spirv.IAdd %123, %18 : i32
%125 = spirv.SLessThan %117, %cst0_i32 : i32
%126 = spirv.ISub %cst-1_i32, %117 : i32
%127 = spirv.Select %125, %126, %117 : i1, i32
%128 = spirv.SDiv %127, %cst8_i32 : i32
%129 = spirv.ISub %cst-1_i32, %128 : i32
%130 = spirv.Select %125, %129, %128 : i1, i32
%131 = spirv.IAdd %124, %130 : i32
%132 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%133 = spirv.Load "StorageBuffer" %132 : vector<4xf32>
%134 = spirv.IMul %114, %cst33_i32 : i32
%135 = spirv.IAdd %134, %130 : i32
%136 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %136, %133 : vector<4xf32>
%137 = spirv.IAdd %117, %cst1024_i32 : i32
spirv.Branch ^bb1(%137 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%116 = spirv.IAdd %114, %cst1_i32 : i32
spirv.Branch ^bb1(%116 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%69 = spirv.IMul %32, %cst80_i32 : i32
%70 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
%71 = spirv.NV.CooperativeMatrixLoad %70, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%72 = spirv.IAdd %69, %cst2_i32 : i32
%73 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<80 x vector<4xf32>>)>, Workgroup>, i32, i32
%74 = spirv.NV.CooperativeMatrixLoad %73, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%75 = spirv.SLessThan %30, %cst0_i32 : i32
%76 = spirv.ISub %cst-1_i32, %30 : i32
%77 = spirv.Select %75, %76, %30 : i1, i32
%78 = spirv.SDiv %77, %cst32_i32 : i32
%79 = spirv.ISub %cst-1_i32, %78 : i32
%80 = spirv.Select %75, %79, %78 : i1, i32
%81 = spirv.IMul %80, %cst8_i32 : i32
%82 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%83 = spirv.NV.CooperativeMatrixLoad %82, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%84 = spirv.IAdd %81, %cst2_i32 : i32
%85 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %84] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%86 = spirv.NV.CooperativeMatrixLoad %85, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%87 = spirv.IAdd %81, %cst4_i32 : i32
%88 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%89 = spirv.NV.CooperativeMatrixLoad %88, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%90 = spirv.IAdd %81, %cst6_i32 : i32
%91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%92 = spirv.NV.CooperativeMatrixLoad %91, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%93 = spirv.IAdd %81, %cst528_i32 : i32
%94 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %93] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%95 = spirv.NV.CooperativeMatrixLoad %94, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%96 = spirv.IAdd %81, %cst530_i32 : i32
%97 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %96] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%98 = spirv.NV.CooperativeMatrixLoad %97, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%99 = spirv.IAdd %81, %cst532_i32 : i32
%100 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%101 = spirv.NV.CooperativeMatrixLoad %100, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%102 = spirv.IAdd %81, %cst534_i32 : i32
%103 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<1056 x vector<4xf32>>)>, Workgroup>, i32, i32
%104 = spirv.NV.CooperativeMatrixLoad %103, %cst33_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%105 = spirv.NV.CooperativeMatrixMulAdd %71, %83, %64 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%106 = spirv.NV.CooperativeMatrixMulAdd %74, %95, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%107 = spirv.NV.CooperativeMatrixMulAdd %71, %86, %65 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%108 = spirv.NV.CooperativeMatrixMulAdd %74, %98, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%109 = spirv.NV.CooperativeMatrixMulAdd %71, %89, %66 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%110 = spirv.NV.CooperativeMatrixMulAdd %74, %101, %109 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%111 = spirv.NV.CooperativeMatrixMulAdd %71, %92, %67 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%112 = spirv.NV.CooperativeMatrixMulAdd %74, %104, %111 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %34, %106 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %35, %108 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %36, %110 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %37, %112 : !spirv.coopmatrix<16x16xf16, Subgroup>
%113 = spirv.IAdd %63, %cst32_i32 : i32
spirv.Branch ^bb1(%113, %106, %108, %110, %112 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%38 = spirv.Load "Function" %37 : !spirv.coopmatrix<16x16xf16, Subgroup>
%39 = spirv.Load "Function" %36 : !spirv.coopmatrix<16x16xf16, Subgroup>
%40 = spirv.Load "Function" %35 : !spirv.coopmatrix<16x16xf16, Subgroup>
%41 = spirv.Load "Function" %34 : !spirv.coopmatrix<16x16xf16, Subgroup>
%42 = spirv.IMul %26, %cst2560_i32 : i32
%43 = spirv.IMul %32, %cst2560_i32 : i32
%44 = spirv.IAdd %42, %43 : i32
%45 = spirv.IMul %28, %cst32_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IAdd %46, %24 : i32
%48 = spirv.SLessThan %30, %cst0_i32 : i32
%49 = spirv.ISub %cst-1_i32, %30 : i32
%50 = spirv.Select %48, %49, %30 : i1, i32
%51 = spirv.SDiv %50, %cst32_i32 : i32
%52 = spirv.ISub %cst-1_i32, %51 : i32
%53 = spirv.Select %48, %52, %51 : i1, i32
%54 = spirv.IMul %53, %cst8_i32 : i32
%55 = spirv.IAdd %47, %54 : i32
%56 = spirv.IAdd %55, %cst6_i32 : i32
%57 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %57, %38, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%58 = spirv.IAdd %55, %cst4_i32 : i32
%59 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %59, %39, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%60 = spirv.IAdd %55, %cst2_i32 : i32
%61 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %61, %40, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%62 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %62, %41, %cst160_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_9_matmul_32x1280x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_9_matmul_32x1280x1280 "LocalSize", 128, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_10 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_10 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c3 = arith.constant 3 : index
%c96 = arith.constant 96 : index
%c8 = arith.constant 8 : index
hal.return %c3, %c96, %c8 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_10() "None" {
%cst419_i32 = spirv.Constant 419 : i32
%cst38416_i32 = spirv.Constant 38416 : i32
%cst98_i32 = spirv.Constant 98 : i32
%cst9604_i32 = spirv.Constant 9604 : i32
%cst36864_i32 = spirv.Constant 36864 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst4_i32 = spirv.Constant 4 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32>
%4 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[2 : i32] : vector<3xi32>
%6 = spirv.UMod %5, %cst4_i32 : i32
%7 = spirv.UDiv %5, %cst4_i32 : i32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%8 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32>
%12 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%13 = spirv.CompositeExtract %12[2 : i32] : vector<3xi32>
%14 = spirv.IMul %6, %cst9216_i32 : i32
%15 = spirv.IMul %13, %cst9216_i32 : i32
%16 = spirv.IAdd %14, %15 : i32
%17 = spirv.IMul %3, %cst96_i32 : i32
%18 = spirv.IAdd %16, %17 : i32
%19 = spirv.IMul %11, %cst96_i32 : i32
%20 = spirv.IAdd %18, %19 : i32
%21 = spirv.IAdd %20, %9 : i32
%22 = spirv.IMul %1, %cst32_i32 : i32
%23 = spirv.IAdd %21, %22 : i32
%24 = spirv.IMul %7, %cst36864_i32 : i32
%25 = spirv.IAdd %23, %24 : i32
%26 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %25] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%27 = spirv.Load "StorageBuffer" %26 : f16
%28 = spirv.IMul %6, %cst9604_i32 : i32
%29 = spirv.IMul %13, %cst9604_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IMul %3, %cst98_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IMul %11, %cst98_i32 : i32
%34 = spirv.IAdd %32, %33 : i32
%35 = spirv.IAdd %34, %9 : i32
%36 = spirv.IAdd %35, %22 : i32
%37 = spirv.IMul %7, %cst38416_i32 : i32
%38 = spirv.IAdd %36, %37 : i32
%39 = spirv.IAdd %38, %cst419_i32 : i32
%40 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %39] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %40, %27 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_10, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_10 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_11 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_11_generic_2x4x3x3x96x96 ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
%c3 = arith.constant 3 : index
%c96 = arith.constant 96 : index
%c72 = arith.constant 72 : index
hal.return %c3, %c96, %c72 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_11_generic_2x4x3x3x96x96() "None" {
%cst78432_i32 = spirv.Constant 78432 : i32
%cst27648_i32 = spirv.Constant 27648 : i32
%cst82944_i32 = spirv.Constant 82944 : i32
%cst331776_i32 = spirv.Constant 331776 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst9604_i32 = spirv.Constant 9604 : i32
%cst38416_i32 = spirv.Constant 38416 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst98_i32 = spirv.Constant 98 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[2 : i32] : vector<3xi32>
%2 = spirv.UDiv %1, %cst3_i32 : i32
%3 = spirv.UDiv %2, %cst3_i32 : i32
%4 = spirv.UDiv %3, %cst4_i32 : i32
%5 = spirv.UMod %3, %cst4_i32 : i32
%6 = spirv.UMod %2, %cst3_i32 : i32
%7 = spirv.UMod %1, %cst3_i32 : i32
%8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%9 = spirv.CompositeExtract %8[1 : i32] : vector<3xi32>
%10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%11 = spirv.CompositeExtract %10[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%12 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%13 = spirv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%15 = spirv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.IMul %15, %cst98_i32 : i32
%19 = spirv.IMul %6, %cst98_i32 : i32
%20 = spirv.IAdd %18, %19 : i32
%21 = spirv.IMul %9, %cst98_i32 : i32
%22 = spirv.IAdd %20, %21 : i32
%23 = spirv.IAdd %22, %7 : i32
%24 = spirv.IMul %11, %cst32_i32 : i32
%25 = spirv.IAdd %23, %24 : i32
%26 = spirv.IAdd %25, %17 : i32
%27 = spirv.IAdd %26, %13 : i32
%28 = spirv.IMul %4, %cst38416_i32 : i32
%29 = spirv.IAdd %27, %28 : i32
%30 = spirv.IMul %5, %cst9604_i32 : i32
%31 = spirv.IAdd %29, %30 : i32
%32 = spirv.IAdd %31, %cst320_i32 : i32
%33 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%34 = spirv.Load "StorageBuffer" %33 : f16
%35 = spirv.IMul %7, %cst9216_i32 : i32
%36 = spirv.IMul %17, %cst9216_i32 : i32
%37 = spirv.IAdd %35, %36 : i32
%38 = spirv.IMul %9, %cst96_i32 : i32
%39 = spirv.IAdd %37, %38 : i32
%40 = spirv.IMul %15, %cst96_i32 : i32
%41 = spirv.IAdd %39, %40 : i32
%42 = spirv.IAdd %41, %13 : i32
%43 = spirv.IAdd %42, %24 : i32
%44 = spirv.IMul %4, %cst331776_i32 : i32
%45 = spirv.IAdd %43, %44 : i32
%46 = spirv.IMul %5, %cst82944_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IMul %6, %cst27648_i32 : i32
%49 = spirv.IAdd %47, %48 : i32
%50 = spirv.IAdd %49, %cst78432_i32 : i32
%51 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %50] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %51, %34 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_11_generic_2x4x3x3x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_11_generic_2x4x3x3x96x96 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_12 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_12_generic_2x320x9216x36 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 8 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c36 = arith.constant 36 : index
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
hal.return %c36, %c5, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_12_generic_2x320x9216x36() "None" {
%cst100812_i32 = spirv.Constant 100812 : i32
%cst99660_i32 = spirv.Constant 99660 : i32
%cst98508_i32 = spirv.Constant 98508 : i32
%cst97356_i32 = spirv.Constant 97356 : i32
%cst96204_i32 = spirv.Constant 96204 : i32
%cst95052_i32 = spirv.Constant 95052 : i32
%cst93900_i32 = spirv.Constant 93900 : i32
%cst368640_i32 = spirv.Constant 368640 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst108199360_i32 = spirv.Constant 108199360 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst13260_i32 = spirv.Constant 13260 : i32
%cst12108_i32 = spirv.Constant 12108 : i32
%cst10956_i32 = spirv.Constant 10956 : i32
%cst9804_i32 = spirv.Constant 9804 : i32
%cst41472_i32 = spirv.Constant 41472 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst1152_i32 = spirv.Constant 1152 : i32
%cst75336383_i32 = spirv.Constant 75336383 : i32
%cst75336374_i32 = spirv.Constant 75336374 : i32
%cst75336365_i32 = spirv.Constant 75336365 : i32
%cst75336356_i32 = spirv.Constant 75336356 : i32
%cst75336347_i32 = spirv.Constant 75336347 : i32
%cst75336338_i32 = spirv.Constant 75336338 : i32
%cst75336329_i32 = spirv.Constant 75336329 : i32
%cst75336320_i32 = spirv.Constant 75336320 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst576_i32 = spirv.Constant 576 : i32
%cst92748_i32 = spirv.Constant 92748 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst36_i32 = spirv.Constant 36 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[2 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi32>
%4 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%6 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%7 = spirv.CompositeExtract %6[1 : i32] : vector<3xi32>
%8 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%11 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%12 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%13 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%14 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%15 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%16 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
%17 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32, %cst_vec_4xf32 : i32, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>)
^bb1(%159: i32, %160: vector<4xf32>, %161: vector<4xf32>, %162: vector<4xf32>, %163: vector<4xf32>, %164: vector<4xf32>, %165: vector<4xf32>, %166: vector<4xf32>, %167: vector<4xf32>): // 2 preds: ^bb0, ^bb2
%168 = spirv.SLessThan %159, %cst36_i32 : i32
spirv.BranchConditional %168, ^bb2, ^bb3
^bb2: // pred: ^bb1
%169 = spirv.IMul %3, %cst576_i32 : i32
%170 = spirv.IMul %7, %cst72_i32 : i32
%171 = spirv.IAdd %169, %170 : i32
%172 = spirv.SLessThan %159, %cst0_i32 : i32
%173 = spirv.ISub %cst-1_i32, %159 : i32
%174 = spirv.Select %172, %173, %159 : i1, i32
%175 = spirv.SDiv %174, %cst4_i32 : i32
%176 = spirv.ISub %cst-1_i32, %175 : i32
%177 = spirv.Select %172, %176, %175 : i1, i32
%178 = spirv.IAdd %171, %177 : i32
%179 = spirv.IAdd %178, %cst75336320_i32 : i32
%180 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %179] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%181 = spirv.Load "StorageBuffer" %180 : vector<4xf16>
%182 = spirv.IAdd %178, %cst75336329_i32 : i32
%183 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %182] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%184 = spirv.Load "StorageBuffer" %183 : vector<4xf16>
%185 = spirv.IAdd %178, %cst75336338_i32 : i32
%186 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%187 = spirv.Load "StorageBuffer" %186 : vector<4xf16>
%188 = spirv.IAdd %178, %cst75336347_i32 : i32
%189 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %188] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%190 = spirv.Load "StorageBuffer" %189 : vector<4xf16>
%191 = spirv.IAdd %178, %cst75336356_i32 : i32
%192 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%193 = spirv.Load "StorageBuffer" %192 : vector<4xf16>
%194 = spirv.IAdd %178, %cst75336365_i32 : i32
%195 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%196 = spirv.Load "StorageBuffer" %195 : vector<4xf16>
%197 = spirv.IAdd %178, %cst75336374_i32 : i32
%198 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%199 = spirv.Load "StorageBuffer" %198 : vector<4xf16>
%200 = spirv.IAdd %178, %cst75336383_i32 : i32
%201 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%202 = spirv.Load "StorageBuffer" %201 : vector<4xf16>
%203 = spirv.IMul %159, %cst1152_i32 : i32
%204 = spirv.IMul %5, %cst32_i32 : i32
%205 = spirv.IAdd %203, %204 : i32
%206 = spirv.IAdd %205, %9 : i32
%207 = spirv.IMul %1, %cst41472_i32 : i32
%208 = spirv.IAdd %206, %207 : i32
%209 = spirv.IAdd %208, %cst9804_i32 : i32
%210 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %209] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%211 = spirv.Load "StorageBuffer" %210 : vector<4xf32>
%212 = spirv.IAdd %208, %cst10956_i32 : i32
%213 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %212] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%214 = spirv.Load "StorageBuffer" %213 : vector<4xf32>
%215 = spirv.IAdd %208, %cst12108_i32 : i32
%216 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %215] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%217 = spirv.Load "StorageBuffer" %216 : vector<4xf32>
%218 = spirv.IAdd %208, %cst13260_i32 : i32
%219 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %218] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%220 = spirv.Load "StorageBuffer" %219 : vector<4xf32>
%221 = spirv.VectorShuffle [0 : i32, 1 : i32] %211 : vector<4xf32>, %211 : vector<4xf32> -> vector<2xf32>
%222 = spirv.Bitcast %221 : vector<2xf32> to vector<4xf16>
%223 = spirv.VectorShuffle [0 : i32, 1 : i32] %167 : vector<4xf32>, %167 : vector<4xf32> -> vector<2xf32>
%224 = spirv.Bitcast %223 : vector<2xf32> to vector<4xf16>
%225 = spirv.CompositeExtract %181[0 : i32] : vector<4xf16>
%226 = spirv.CompositeConstruct %225, %225, %225, %225 : (f16, f16, f16, f16) -> vector<4xf16>
%227 = spirv.GL.Fma %226, %222, %224 : vector<4xf16>
%228 = spirv.VectorShuffle [0 : i32, 1 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32>
%229 = spirv.Bitcast %228 : vector<2xf32> to vector<4xf16>
%230 = spirv.CompositeExtract %181[1 : i32] : vector<4xf16>
%231 = spirv.CompositeConstruct %230, %230, %230, %230 : (f16, f16, f16, f16) -> vector<4xf16>
%232 = spirv.GL.Fma %231, %229, %227 : vector<4xf16>
%233 = spirv.VectorShuffle [0 : i32, 1 : i32] %217 : vector<4xf32>, %217 : vector<4xf32> -> vector<2xf32>
%234 = spirv.Bitcast %233 : vector<2xf32> to vector<4xf16>
%235 = spirv.CompositeExtract %181[2 : i32] : vector<4xf16>
%236 = spirv.CompositeConstruct %235, %235, %235, %235 : (f16, f16, f16, f16) -> vector<4xf16>
%237 = spirv.GL.Fma %236, %234, %232 : vector<4xf16>
%238 = spirv.VectorShuffle [0 : i32, 1 : i32] %220 : vector<4xf32>, %220 : vector<4xf32> -> vector<2xf32>
%239 = spirv.Bitcast %238 : vector<2xf32> to vector<4xf16>
%240 = spirv.CompositeExtract %181[3 : i32] : vector<4xf16>
%241 = spirv.CompositeConstruct %240, %240, %240, %240 : (f16, f16, f16, f16) -> vector<4xf16>
%242 = spirv.GL.Fma %241, %239, %237 : vector<4xf16>
%243 = spirv.VectorShuffle [2 : i32, 3 : i32] %211 : vector<4xf32>, %211 : vector<4xf32> -> vector<2xf32>
%244 = spirv.Bitcast %243 : vector<2xf32> to vector<4xf16>
%245 = spirv.VectorShuffle [2 : i32, 3 : i32] %167 : vector<4xf32>, %167 : vector<4xf32> -> vector<2xf32>
%246 = spirv.Bitcast %245 : vector<2xf32> to vector<4xf16>
%247 = spirv.GL.Fma %226, %244, %246 : vector<4xf16>
%248 = spirv.VectorShuffle [2 : i32, 3 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32>
%249 = spirv.Bitcast %248 : vector<2xf32> to vector<4xf16>
%250 = spirv.GL.Fma %231, %249, %247 : vector<4xf16>
%251 = spirv.VectorShuffle [2 : i32, 3 : i32] %217 : vector<4xf32>, %217 : vector<4xf32> -> vector<2xf32>
%252 = spirv.Bitcast %251 : vector<2xf32> to vector<4xf16>
%253 = spirv.GL.Fma %236, %252, %250 : vector<4xf16>
%254 = spirv.VectorShuffle [2 : i32, 3 : i32] %220 : vector<4xf32>, %220 : vector<4xf32> -> vector<2xf32>
%255 = spirv.Bitcast %254 : vector<2xf32> to vector<4xf16>
%256 = spirv.GL.Fma %241, %255, %253 : vector<4xf16>
%257 = spirv.VectorShuffle [0 : i32, 1 : i32] %166 : vector<4xf32>, %166 : vector<4xf32> -> vector<2xf32>
%258 = spirv.Bitcast %257 : vector<2xf32> to vector<4xf16>
%259 = spirv.CompositeExtract %184[0 : i32] : vector<4xf16>
%260 = spirv.CompositeConstruct %259, %259, %259, %259 : (f16, f16, f16, f16) -> vector<4xf16>
%261 = spirv.GL.Fma %260, %222, %258 : vector<4xf16>
%262 = spirv.CompositeExtract %184[1 : i32] : vector<4xf16>
%263 = spirv.CompositeConstruct %262, %262, %262, %262 : (f16, f16, f16, f16) -> vector<4xf16>
%264 = spirv.GL.Fma %263, %229, %261 : vector<4xf16>
%265 = spirv.CompositeExtract %184[2 : i32] : vector<4xf16>
%266 = spirv.CompositeConstruct %265, %265, %265, %265 : (f16, f16, f16, f16) -> vector<4xf16>
%267 = spirv.GL.Fma %266, %234, %264 : vector<4xf16>
%268 = spirv.CompositeExtract %184[3 : i32] : vector<4xf16>
%269 = spirv.CompositeConstruct %268, %268, %268, %268 : (f16, f16, f16, f16) -> vector<4xf16>
%270 = spirv.GL.Fma %269, %239, %267 : vector<4xf16>
%271 = spirv.VectorShuffle [2 : i32, 3 : i32] %166 : vector<4xf32>, %166 : vector<4xf32> -> vector<2xf32>
%272 = spirv.Bitcast %271 : vector<2xf32> to vector<4xf16>
%273 = spirv.GL.Fma %260, %244, %272 : vector<4xf16>
%274 = spirv.GL.Fma %263, %249, %273 : vector<4xf16>
%275 = spirv.GL.Fma %266, %252, %274 : vector<4xf16>
%276 = spirv.GL.Fma %269, %255, %275 : vector<4xf16>
%277 = spirv.VectorShuffle [0 : i32, 1 : i32] %165 : vector<4xf32>, %165 : vector<4xf32> -> vector<2xf32>
%278 = spirv.Bitcast %277 : vector<2xf32> to vector<4xf16>
%279 = spirv.CompositeExtract %187[0 : i32] : vector<4xf16>
%280 = spirv.CompositeConstruct %279, %279, %279, %279 : (f16, f16, f16, f16) -> vector<4xf16>
%281 = spirv.GL.Fma %280, %222, %278 : vector<4xf16>
%282 = spirv.CompositeExtract %187[1 : i32] : vector<4xf16>
%283 = spirv.CompositeConstruct %282, %282, %282, %282 : (f16, f16, f16, f16) -> vector<4xf16>
%284 = spirv.GL.Fma %283, %229, %281 : vector<4xf16>
%285 = spirv.CompositeExtract %187[2 : i32] : vector<4xf16>
%286 = spirv.CompositeConstruct %285, %285, %285, %285 : (f16, f16, f16, f16) -> vector<4xf16>
%287 = spirv.GL.Fma %286, %234, %284 : vector<4xf16>
%288 = spirv.CompositeExtract %187[3 : i32] : vector<4xf16>
%289 = spirv.CompositeConstruct %288, %288, %288, %288 : (f16, f16, f16, f16) -> vector<4xf16>
%290 = spirv.GL.Fma %289, %239, %287 : vector<4xf16>
%291 = spirv.VectorShuffle [2 : i32, 3 : i32] %165 : vector<4xf32>, %165 : vector<4xf32> -> vector<2xf32>
%292 = spirv.Bitcast %291 : vector<2xf32> to vector<4xf16>
%293 = spirv.GL.Fma %280, %244, %292 : vector<4xf16>
%294 = spirv.GL.Fma %283, %249, %293 : vector<4xf16>
%295 = spirv.GL.Fma %286, %252, %294 : vector<4xf16>
%296 = spirv.GL.Fma %289, %255, %295 : vector<4xf16>
%297 = spirv.VectorShuffle [0 : i32, 1 : i32] %164 : vector<4xf32>, %164 : vector<4xf32> -> vector<2xf32>
%298 = spirv.Bitcast %297 : vector<2xf32> to vector<4xf16>
%299 = spirv.CompositeExtract %190[0 : i32] : vector<4xf16>
%300 = spirv.CompositeConstruct %299, %299, %299, %299 : (f16, f16, f16, f16) -> vector<4xf16>
%301 = spirv.GL.Fma %300, %222, %298 : vector<4xf16>
%302 = spirv.CompositeExtract %190[1 : i32] : vector<4xf16>
%303 = spirv.CompositeConstruct %302, %302, %302, %302 : (f16, f16, f16, f16) -> vector<4xf16>
%304 = spirv.GL.Fma %303, %229, %301 : vector<4xf16>
%305 = spirv.CompositeExtract %190[2 : i32] : vector<4xf16>
%306 = spirv.CompositeConstruct %305, %305, %305, %305 : (f16, f16, f16, f16) -> vector<4xf16>
%307 = spirv.GL.Fma %306, %234, %304 : vector<4xf16>
%308 = spirv.CompositeExtract %190[3 : i32] : vector<4xf16>
%309 = spirv.CompositeConstruct %308, %308, %308, %308 : (f16, f16, f16, f16) -> vector<4xf16>
%310 = spirv.GL.Fma %309, %239, %307 : vector<4xf16>
%311 = spirv.VectorShuffle [2 : i32, 3 : i32] %164 : vector<4xf32>, %164 : vector<4xf32> -> vector<2xf32>
%312 = spirv.Bitcast %311 : vector<2xf32> to vector<4xf16>
%313 = spirv.GL.Fma %300, %244, %312 : vector<4xf16>
%314 = spirv.GL.Fma %303, %249, %313 : vector<4xf16>
%315 = spirv.GL.Fma %306, %252, %314 : vector<4xf16>
%316 = spirv.GL.Fma %309, %255, %315 : vector<4xf16>
%317 = spirv.VectorShuffle [0 : i32, 1 : i32] %163 : vector<4xf32>, %163 : vector<4xf32> -> vector<2xf32>
%318 = spirv.Bitcast %317 : vector<2xf32> to vector<4xf16>
%319 = spirv.CompositeExtract %193[0 : i32] : vector<4xf16>
%320 = spirv.CompositeConstruct %319, %319, %319, %319 : (f16, f16, f16, f16) -> vector<4xf16>
%321 = spirv.GL.Fma %320, %222, %318 : vector<4xf16>
%322 = spirv.CompositeExtract %193[1 : i32] : vector<4xf16>
%323 = spirv.CompositeConstruct %322, %322, %322, %322 : (f16, f16, f16, f16) -> vector<4xf16>
%324 = spirv.GL.Fma %323, %229, %321 : vector<4xf16>
%325 = spirv.CompositeExtract %193[2 : i32] : vector<4xf16>
%326 = spirv.CompositeConstruct %325, %325, %325, %325 : (f16, f16, f16, f16) -> vector<4xf16>
%327 = spirv.GL.Fma %326, %234, %324 : vector<4xf16>
%328 = spirv.CompositeExtract %193[3 : i32] : vector<4xf16>
%329 = spirv.CompositeConstruct %328, %328, %328, %328 : (f16, f16, f16, f16) -> vector<4xf16>
%330 = spirv.GL.Fma %329, %239, %327 : vector<4xf16>
%331 = spirv.VectorShuffle [2 : i32, 3 : i32] %163 : vector<4xf32>, %163 : vector<4xf32> -> vector<2xf32>
%332 = spirv.Bitcast %331 : vector<2xf32> to vector<4xf16>
%333 = spirv.GL.Fma %320, %244, %332 : vector<4xf16>
%334 = spirv.GL.Fma %323, %249, %333 : vector<4xf16>
%335 = spirv.GL.Fma %326, %252, %334 : vector<4xf16>
%336 = spirv.GL.Fma %329, %255, %335 : vector<4xf16>
%337 = spirv.VectorShuffle [0 : i32, 1 : i32] %162 : vector<4xf32>, %162 : vector<4xf32> -> vector<2xf32>
%338 = spirv.Bitcast %337 : vector<2xf32> to vector<4xf16>
%339 = spirv.CompositeExtract %196[0 : i32] : vector<4xf16>
%340 = spirv.CompositeConstruct %339, %339, %339, %339 : (f16, f16, f16, f16) -> vector<4xf16>
%341 = spirv.GL.Fma %340, %222, %338 : vector<4xf16>
%342 = spirv.CompositeExtract %196[1 : i32] : vector<4xf16>
%343 = spirv.CompositeConstruct %342, %342, %342, %342 : (f16, f16, f16, f16) -> vector<4xf16>
%344 = spirv.GL.Fma %343, %229, %341 : vector<4xf16>
%345 = spirv.CompositeExtract %196[2 : i32] : vector<4xf16>
%346 = spirv.CompositeConstruct %345, %345, %345, %345 : (f16, f16, f16, f16) -> vector<4xf16>
%347 = spirv.GL.Fma %346, %234, %344 : vector<4xf16>
%348 = spirv.CompositeExtract %196[3 : i32] : vector<4xf16>
%349 = spirv.CompositeConstruct %348, %348, %348, %348 : (f16, f16, f16, f16) -> vector<4xf16>
%350 = spirv.GL.Fma %349, %239, %347 : vector<4xf16>
%351 = spirv.VectorShuffle [2 : i32, 3 : i32] %162 : vector<4xf32>, %162 : vector<4xf32> -> vector<2xf32>
%352 = spirv.Bitcast %351 : vector<2xf32> to vector<4xf16>
%353 = spirv.GL.Fma %340, %244, %352 : vector<4xf16>
%354 = spirv.GL.Fma %343, %249, %353 : vector<4xf16>
%355 = spirv.GL.Fma %346, %252, %354 : vector<4xf16>
%356 = spirv.GL.Fma %349, %255, %355 : vector<4xf16>
%357 = spirv.VectorShuffle [0 : i32, 1 : i32] %161 : vector<4xf32>, %161 : vector<4xf32> -> vector<2xf32>
%358 = spirv.Bitcast %357 : vector<2xf32> to vector<4xf16>
%359 = spirv.CompositeExtract %199[0 : i32] : vector<4xf16>
%360 = spirv.CompositeConstruct %359, %359, %359, %359 : (f16, f16, f16, f16) -> vector<4xf16>
%361 = spirv.GL.Fma %360, %222, %358 : vector<4xf16>
%362 = spirv.CompositeExtract %199[1 : i32] : vector<4xf16>
%363 = spirv.CompositeConstruct %362, %362, %362, %362 : (f16, f16, f16, f16) -> vector<4xf16>
%364 = spirv.GL.Fma %363, %229, %361 : vector<4xf16>
%365 = spirv.CompositeExtract %199[2 : i32] : vector<4xf16>
%366 = spirv.CompositeConstruct %365, %365, %365, %365 : (f16, f16, f16, f16) -> vector<4xf16>
%367 = spirv.GL.Fma %366, %234, %364 : vector<4xf16>
%368 = spirv.CompositeExtract %199[3 : i32] : vector<4xf16>
%369 = spirv.CompositeConstruct %368, %368, %368, %368 : (f16, f16, f16, f16) -> vector<4xf16>
%370 = spirv.GL.Fma %369, %239, %367 : vector<4xf16>
%371 = spirv.VectorShuffle [2 : i32, 3 : i32] %161 : vector<4xf32>, %161 : vector<4xf32> -> vector<2xf32>
%372 = spirv.Bitcast %371 : vector<2xf32> to vector<4xf16>
%373 = spirv.GL.Fma %360, %244, %372 : vector<4xf16>
%374 = spirv.GL.Fma %363, %249, %373 : vector<4xf16>
%375 = spirv.GL.Fma %366, %252, %374 : vector<4xf16>
%376 = spirv.GL.Fma %369, %255, %375 : vector<4xf16>
%377 = spirv.VectorShuffle [0 : i32, 1 : i32] %160 : vector<4xf32>, %160 : vector<4xf32> -> vector<2xf32>
%378 = spirv.Bitcast %377 : vector<2xf32> to vector<4xf16>
%379 = spirv.CompositeExtract %202[0 : i32] : vector<4xf16>
%380 = spirv.CompositeConstruct %379, %379, %379, %379 : (f16, f16, f16, f16) -> vector<4xf16>
%381 = spirv.GL.Fma %380, %222, %378 : vector<4xf16>
%382 = spirv.CompositeExtract %202[1 : i32] : vector<4xf16>
%383 = spirv.CompositeConstruct %382, %382, %382, %382 : (f16, f16, f16, f16) -> vector<4xf16>
%384 = spirv.GL.Fma %383, %229, %381 : vector<4xf16>
%385 = spirv.CompositeExtract %202[2 : i32] : vector<4xf16>
%386 = spirv.CompositeConstruct %385, %385, %385, %385 : (f16, f16, f16, f16) -> vector<4xf16>
%387 = spirv.GL.Fma %386, %234, %384 : vector<4xf16>
%388 = spirv.CompositeExtract %202[3 : i32] : vector<4xf16>
%389 = spirv.CompositeConstruct %388, %388, %388, %388 : (f16, f16, f16, f16) -> vector<4xf16>
%390 = spirv.GL.Fma %389, %239, %387 : vector<4xf16>
%391 = spirv.VectorShuffle [2 : i32, 3 : i32] %160 : vector<4xf32>, %160 : vector<4xf32> -> vector<2xf32>
%392 = spirv.Bitcast %391 : vector<2xf32> to vector<4xf16>
%393 = spirv.GL.Fma %380, %244, %392 : vector<4xf16>
%394 = spirv.GL.Fma %383, %249, %393 : vector<4xf16>
%395 = spirv.GL.Fma %386, %252, %394 : vector<4xf16>
%396 = spirv.GL.Fma %389, %255, %395 : vector<4xf16>
%397 = spirv.Bitcast %396 : vector<4xf16> to vector<2xf32>
%398 = spirv.Bitcast %390 : vector<4xf16> to vector<2xf32>
%399 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %398 : vector<2xf32> -> vector<4xf32>
%400 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %399 : vector<4xf32>, %397 : vector<2xf32> -> vector<4xf32>
%401 = spirv.Bitcast %376 : vector<4xf16> to vector<2xf32>
%402 = spirv.Bitcast %370 : vector<4xf16> to vector<2xf32>
%403 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %402 : vector<2xf32> -> vector<4xf32>
%404 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %403 : vector<4xf32>, %401 : vector<2xf32> -> vector<4xf32>
%405 = spirv.Bitcast %356 : vector<4xf16> to vector<2xf32>
%406 = spirv.Bitcast %350 : vector<4xf16> to vector<2xf32>
%407 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %406 : vector<2xf32> -> vector<4xf32>
%408 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %407 : vector<4xf32>, %405 : vector<2xf32> -> vector<4xf32>
%409 = spirv.Bitcast %336 : vector<4xf16> to vector<2xf32>
%410 = spirv.Bitcast %330 : vector<4xf16> to vector<2xf32>
%411 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %410 : vector<2xf32> -> vector<4xf32>
%412 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %411 : vector<4xf32>, %409 : vector<2xf32> -> vector<4xf32>
%413 = spirv.Bitcast %316 : vector<4xf16> to vector<2xf32>
%414 = spirv.Bitcast %310 : vector<4xf16> to vector<2xf32>
%415 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %414 : vector<2xf32> -> vector<4xf32>
%416 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %415 : vector<4xf32>, %413 : vector<2xf32> -> vector<4xf32>
%417 = spirv.Bitcast %296 : vector<4xf16> to vector<2xf32>
%418 = spirv.Bitcast %290 : vector<4xf16> to vector<2xf32>
%419 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %418 : vector<2xf32> -> vector<4xf32>
%420 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %419 : vector<4xf32>, %417 : vector<2xf32> -> vector<4xf32>
%421 = spirv.Bitcast %276 : vector<4xf16> to vector<2xf32>
%422 = spirv.Bitcast %270 : vector<4xf16> to vector<2xf32>
%423 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %422 : vector<2xf32> -> vector<4xf32>
%424 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %423 : vector<4xf32>, %421 : vector<2xf32> -> vector<4xf32>
%425 = spirv.Bitcast %256 : vector<4xf16> to vector<2xf32>
%426 = spirv.Bitcast %242 : vector<4xf16> to vector<2xf32>
%427 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %426 : vector<2xf32> -> vector<4xf32>
%428 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %427 : vector<4xf32>, %425 : vector<2xf32> -> vector<4xf32>
spirv.Store "Function" %10, %400 : vector<4xf32>
spirv.Store "Function" %11, %404 : vector<4xf32>
spirv.Store "Function" %12, %408 : vector<4xf32>
spirv.Store "Function" %13, %412 : vector<4xf32>
spirv.Store "Function" %14, %416 : vector<4xf32>
spirv.Store "Function" %15, %420 : vector<4xf32>
spirv.Store "Function" %16, %424 : vector<4xf32>
spirv.Store "Function" %17, %428 : vector<4xf32>
%429 = spirv.IAdd %159, %cst4_i32 : i32
spirv.Branch ^bb1(%429, %400, %404, %408, %412, %416, %420, %424, %428 : i32, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%18 = spirv.Load "Function" %17 : vector<4xf32>
%19 = spirv.Load "Function" %16 : vector<4xf32>
%20 = spirv.Load "Function" %15 : vector<4xf32>
%21 = spirv.Load "Function" %14 : vector<4xf32>
%22 = spirv.Load "Function" %13 : vector<4xf32>
%23 = spirv.Load "Function" %12 : vector<4xf32>
%24 = spirv.Load "Function" %11 : vector<4xf32>
%25 = spirv.Load "Function" %10 : vector<4xf32>
%26 = spirv.IMul %3, %cst8_i32 : i32
%27 = spirv.IAdd %26, %7 : i32
%28 = spirv.IAdd %27, %cst108199360_i32 : i32
%29 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %28] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%30 = spirv.Load "StorageBuffer" %29 : vector<4xf32>
%31 = spirv.CompositeExtract %30[0 : i32] : vector<4xf32>
%32 = spirv.Bitcast %31 : f32 to vector<2xf16>
%33 = spirv.CompositeExtract %32[0 : i32] : vector<2xf16>
%34 = spirv.CompositeConstruct %33, %33, %33, %33 : (f16, f16, f16, f16) -> vector<4xf16>
%35 = spirv.CompositeExtract %32[1 : i32] : vector<2xf16>
%36 = spirv.CompositeConstruct %35, %35, %35, %35 : (f16, f16, f16, f16) -> vector<4xf16>
%37 = spirv.CompositeExtract %30[1 : i32] : vector<4xf32>
%38 = spirv.Bitcast %37 : f32 to vector<2xf16>
%39 = spirv.CompositeExtract %38[0 : i32] : vector<2xf16>
%40 = spirv.CompositeConstruct %39, %39, %39, %39 : (f16, f16, f16, f16) -> vector<4xf16>
%41 = spirv.CompositeExtract %38[1 : i32] : vector<2xf16>
%42 = spirv.CompositeConstruct %41, %41, %41, %41 : (f16, f16, f16, f16) -> vector<4xf16>
%43 = spirv.CompositeExtract %30[2 : i32] : vector<4xf32>
%44 = spirv.Bitcast %43 : f32 to vector<2xf16>
%45 = spirv.CompositeExtract %44[0 : i32] : vector<2xf16>
%46 = spirv.CompositeConstruct %45, %45, %45, %45 : (f16, f16, f16, f16) -> vector<4xf16>
%47 = spirv.CompositeExtract %44[1 : i32] : vector<2xf16>
%48 = spirv.CompositeConstruct %47, %47, %47, %47 : (f16, f16, f16, f16) -> vector<4xf16>
%49 = spirv.CompositeExtract %30[3 : i32] : vector<4xf32>
%50 = spirv.Bitcast %49 : f32 to vector<2xf16>
%51 = spirv.CompositeExtract %50[0 : i32] : vector<2xf16>
%52 = spirv.CompositeConstruct %51, %51, %51, %51 : (f16, f16, f16, f16) -> vector<4xf16>
%53 = spirv.CompositeExtract %50[1 : i32] : vector<2xf16>
%54 = spirv.CompositeConstruct %53, %53, %53, %53 : (f16, f16, f16, f16) -> vector<4xf16>
%55 = spirv.VectorShuffle [0 : i32, 1 : i32] %18 : vector<4xf32>, %18 : vector<4xf32> -> vector<2xf32>
%56 = spirv.Bitcast %55 : vector<2xf32> to vector<4xf16>
%57 = spirv.FAdd %56, %34 : vector<4xf16>
%58 = spirv.VectorShuffle [2 : i32, 3 : i32] %18 : vector<4xf32>, %18 : vector<4xf32> -> vector<2xf32>
%59 = spirv.Bitcast %58 : vector<2xf32> to vector<4xf16>
%60 = spirv.FAdd %59, %34 : vector<4xf16>
%61 = spirv.VectorShuffle [0 : i32, 1 : i32] %19 : vector<4xf32>, %19 : vector<4xf32> -> vector<2xf32>
%62 = spirv.Bitcast %61 : vector<2xf32> to vector<4xf16>
%63 = spirv.FAdd %62, %36 : vector<4xf16>
%64 = spirv.VectorShuffle [2 : i32, 3 : i32] %19 : vector<4xf32>, %19 : vector<4xf32> -> vector<2xf32>
%65 = spirv.Bitcast %64 : vector<2xf32> to vector<4xf16>
%66 = spirv.FAdd %65, %36 : vector<4xf16>
%67 = spirv.VectorShuffle [0 : i32, 1 : i32] %20 : vector<4xf32>, %20 : vector<4xf32> -> vector<2xf32>
%68 = spirv.Bitcast %67 : vector<2xf32> to vector<4xf16>
%69 = spirv.FAdd %68, %40 : vector<4xf16>
%70 = spirv.VectorShuffle [2 : i32, 3 : i32] %20 : vector<4xf32>, %20 : vector<4xf32> -> vector<2xf32>
%71 = spirv.Bitcast %70 : vector<2xf32> to vector<4xf16>
%72 = spirv.FAdd %71, %40 : vector<4xf16>
%73 = spirv.VectorShuffle [0 : i32, 1 : i32] %21 : vector<4xf32>, %21 : vector<4xf32> -> vector<2xf32>
%74 = spirv.Bitcast %73 : vector<2xf32> to vector<4xf16>
%75 = spirv.FAdd %74, %42 : vector<4xf16>
%76 = spirv.VectorShuffle [2 : i32, 3 : i32] %21 : vector<4xf32>, %21 : vector<4xf32> -> vector<2xf32>
%77 = spirv.Bitcast %76 : vector<2xf32> to vector<4xf16>
%78 = spirv.FAdd %77, %42 : vector<4xf16>
%79 = spirv.VectorShuffle [0 : i32, 1 : i32] %22 : vector<4xf32>, %22 : vector<4xf32> -> vector<2xf32>
%80 = spirv.Bitcast %79 : vector<2xf32> to vector<4xf16>
%81 = spirv.FAdd %80, %46 : vector<4xf16>
%82 = spirv.VectorShuffle [2 : i32, 3 : i32] %22 : vector<4xf32>, %22 : vector<4xf32> -> vector<2xf32>
%83 = spirv.Bitcast %82 : vector<2xf32> to vector<4xf16>
%84 = spirv.FAdd %83, %46 : vector<4xf16>
%85 = spirv.VectorShuffle [0 : i32, 1 : i32] %23 : vector<4xf32>, %23 : vector<4xf32> -> vector<2xf32>
%86 = spirv.Bitcast %85 : vector<2xf32> to vector<4xf16>
%87 = spirv.FAdd %86, %48 : vector<4xf16>
%88 = spirv.VectorShuffle [2 : i32, 3 : i32] %23 : vector<4xf32>, %23 : vector<4xf32> -> vector<2xf32>
%89 = spirv.Bitcast %88 : vector<2xf32> to vector<4xf16>
%90 = spirv.FAdd %89, %48 : vector<4xf16>
%91 = spirv.VectorShuffle [0 : i32, 1 : i32] %24 : vector<4xf32>, %24 : vector<4xf32> -> vector<2xf32>
%92 = spirv.Bitcast %91 : vector<2xf32> to vector<4xf16>
%93 = spirv.FAdd %92, %52 : vector<4xf16>
%94 = spirv.VectorShuffle [2 : i32, 3 : i32] %24 : vector<4xf32>, %24 : vector<4xf32> -> vector<2xf32>
%95 = spirv.Bitcast %94 : vector<2xf32> to vector<4xf16>
%96 = spirv.FAdd %95, %52 : vector<4xf16>
%97 = spirv.VectorShuffle [0 : i32, 1 : i32] %25 : vector<4xf32>, %25 : vector<4xf32> -> vector<2xf32>
%98 = spirv.Bitcast %97 : vector<2xf32> to vector<4xf16>
%99 = spirv.FAdd %98, %54 : vector<4xf16>
%100 = spirv.VectorShuffle [2 : i32, 3 : i32] %25 : vector<4xf32>, %25 : vector<4xf32> -> vector<2xf32>
%101 = spirv.Bitcast %100 : vector<2xf32> to vector<4xf16>
%102 = spirv.FAdd %101, %54 : vector<4xf16>
%103 = spirv.Bitcast %60 : vector<4xf16> to vector<2xf32>
%104 = spirv.Bitcast %57 : vector<4xf16> to vector<2xf32>
%105 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %104 : vector<2xf32> -> vector<4xf32>
%106 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %105 : vector<4xf32>, %103 : vector<2xf32> -> vector<4xf32>
%107 = spirv.IMul %3, %cst73728_i32 : i32
%108 = spirv.IMul %7, %cst9216_i32 : i32
%109 = spirv.IAdd %107, %108 : i32
%110 = spirv.IMul %5, %cst32_i32 : i32
%111 = spirv.IAdd %109, %110 : i32
%112 = spirv.IAdd %111, %9 : i32
%113 = spirv.IMul %1, %cst368640_i32 : i32
%114 = spirv.IAdd %112, %113 : i32
%115 = spirv.IAdd %114, %cst92748_i32 : i32
%116 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %116, %106 : vector<4xf32>
%117 = spirv.Bitcast %66 : vector<4xf16> to vector<2xf32>
%118 = spirv.Bitcast %63 : vector<4xf16> to vector<2xf32>
%119 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %118 : vector<2xf32> -> vector<4xf32>
%120 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %119 : vector<4xf32>, %117 : vector<2xf32> -> vector<4xf32>
%121 = spirv.IAdd %114, %cst93900_i32 : i32
%122 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %122, %120 : vector<4xf32>
%123 = spirv.Bitcast %72 : vector<4xf16> to vector<2xf32>
%124 = spirv.Bitcast %69 : vector<4xf16> to vector<2xf32>
%125 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %124 : vector<2xf32> -> vector<4xf32>
%126 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %125 : vector<4xf32>, %123 : vector<2xf32> -> vector<4xf32>
%127 = spirv.IAdd %114, %cst95052_i32 : i32
%128 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %127] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %128, %126 : vector<4xf32>
%129 = spirv.Bitcast %78 : vector<4xf16> to vector<2xf32>
%130 = spirv.Bitcast %75 : vector<4xf16> to vector<2xf32>
%131 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %130 : vector<2xf32> -> vector<4xf32>
%132 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %131 : vector<4xf32>, %129 : vector<2xf32> -> vector<4xf32>
%133 = spirv.IAdd %114, %cst96204_i32 : i32
%134 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %134, %132 : vector<4xf32>
%135 = spirv.Bitcast %84 : vector<4xf16> to vector<2xf32>
%136 = spirv.Bitcast %81 : vector<4xf16> to vector<2xf32>
%137 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %136 : vector<2xf32> -> vector<4xf32>
%138 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %137 : vector<4xf32>, %135 : vector<2xf32> -> vector<4xf32>
%139 = spirv.IAdd %114, %cst97356_i32 : i32
%140 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %140, %138 : vector<4xf32>
%141 = spirv.Bitcast %90 : vector<4xf16> to vector<2xf32>
%142 = spirv.Bitcast %87 : vector<4xf16> to vector<2xf32>
%143 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %142 : vector<2xf32> -> vector<4xf32>
%144 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %143 : vector<4xf32>, %141 : vector<2xf32> -> vector<4xf32>
%145 = spirv.IAdd %114, %cst98508_i32 : i32
%146 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %146, %144 : vector<4xf32>
%147 = spirv.Bitcast %96 : vector<4xf16> to vector<2xf32>
%148 = spirv.Bitcast %93 : vector<4xf16> to vector<2xf32>
%149 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %148 : vector<2xf32> -> vector<4xf32>
%150 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %149 : vector<4xf32>, %147 : vector<2xf32> -> vector<4xf32>
%151 = spirv.IAdd %114, %cst99660_i32 : i32
%152 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %151] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %152, %150 : vector<4xf32>
%153 = spirv.Bitcast %102 : vector<4xf16> to vector<2xf32>
%154 = spirv.Bitcast %99 : vector<4xf16> to vector<2xf32>
%155 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %154 : vector<2xf32> -> vector<4xf32>
%156 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %155 : vector<4xf32>, %153 : vector<2xf32> -> vector<4xf32>
%157 = spirv.IAdd %114, %cst100812_i32 : i32
%158 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %158, %156 : vector<4xf32>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_12_generic_2x320x9216x36, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_12_generic_2x320x9216x36 "LocalSize", 32, 8, 1
}
}
}
}
hal.executable private @forward_dispatch_13 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_13_generic_5898240 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index):
%c46080 = arith.constant 46080 : index
%c1 = arith.constant 1 : index
hal.return %c46080, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_13_generic_5898240() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst8_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst16_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spirv.IMul %17, %cst32_i32 : i32
%21 = spirv.IAdd %20, %19 : i32
%22 = spirv.IAdd %21, %9 : i32
%23 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%24 = spirv.Load "StorageBuffer" %23 : vector<4xf16>
%25 = spirv.FConvert %24 : vector<4xf16> to vector<4xf32>
%26 = spirv.IAdd %21, %15 : i32
%27 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %26] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %27, %25 : vector<4xf32>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_13_generic_5898240, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_13_generic_5898240 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_14 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_14_generic_2x32x92160 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [512 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c32 = arith.constant 32 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c32, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.func @forward_dispatch_14_generic_2x32x92160() "None" {
%cst23040_i32 = spirv.Constant 23040 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst15_i32 = spirv.Constant 15 : i32
%cst_f32 = spirv.Constant 9.216000e+04 : f32
%cst2048_i32 = spirv.Constant 2048 : i32
%cst92160_i32 = spirv.Constant 92160 : i32
%cst_f32_0 = spirv.Constant 0.000000e+00 : f32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.SLessThan %3, %cst0_i32 : i32
%7 = spirv.ISub %cst-1_i32, %3 : i32
%8 = spirv.Select %6, %7, %3 : i1, i32
%9 = spirv.SDiv %8, %cst16_i32 : i32
%10 = spirv.ISub %cst-1_i32, %9 : i32
%11 = spirv.Select %6, %10, %9 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%12 = spirv.SLessThan %5, %cst0_i32 : i32
%13 = spirv.ISub %cst-1_i32, %5 : i32
%14 = spirv.Select %12, %13, %5 : i1, i32
%15 = spirv.SDiv %14, %cst4_i32 : i32
%16 = spirv.ISub %cst-1_i32, %15 : i32
%17 = spirv.Select %12, %16, %15 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32>
%22 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>)
^bb1(%59: i32, %60: vector<4xf32>): // 2 preds: ^bb0, ^bb2
%61 = spirv.SLessThan %59, %cst92160_i32 : i32
spirv.BranchConditional %61, ^bb2, ^bb3
^bb2: // pred: ^bb1
%62 = spirv.SLessThan %59, %cst0_i32 : i32
%63 = spirv.ISub %cst-1_i32, %59 : i32
%64 = spirv.Select %62, %63, %59 : i1, i32
%65 = spirv.SDiv %64, %cst4_i32 : i32
%66 = spirv.ISub %cst-1_i32, %65 : i32
%67 = spirv.Select %62, %66, %65 : i1, i32
%68 = spirv.IMul %19, %cst737280_i32 : i32
%69 = spirv.IAdd %1, %68 : i32
%70 = spirv.IMul %21, %cst23040_i32 : i32
%71 = spirv.IAdd %69, %70 : i32
%72 = spirv.IAdd %67, %71 : i32
%73 = spirv.IAdd %72, %11 : i32
%74 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%75 = spirv.Load "StorageBuffer" %74 : vector<4xf32>
%76 = spirv.FAdd %75, %60 : vector<4xf32>
spirv.Store "Function" %22, %76 : vector<4xf32>
%77 = spirv.IAdd %59, %cst2048_i32 : i32
spirv.Branch ^bb1(%77, %76 : i32, vector<4xf32>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%23 = spirv.Load "Function" %22 : vector<4xf32>
%24 = spirv.CompositeExtract %23[0 : i32] : vector<4xf32>
%25 = spirv.CompositeExtract %23[1 : i32] : vector<4xf32>
%26 = spirv.CompositeExtract %23[2 : i32] : vector<4xf32>
%27 = spirv.CompositeExtract %23[3 : i32] : vector<4xf32>
%28 = spirv.FAdd %24, %25 : f32
%29 = spirv.FAdd %28, %26 : f32
%30 = spirv.FAdd %29, %27 : f32
%31 = spirv.GroupNonUniformShuffleXor <Subgroup> %30, %cst1_i32 : f32, i32
%32 = spirv.FAdd %30, %31 : f32
%33 = spirv.GroupNonUniformShuffleXor <Subgroup> %32, %cst2_i32 : f32, i32
%34 = spirv.FAdd %32, %33 : f32
%35 = spirv.GroupNonUniformShuffleXor <Subgroup> %34, %cst4_i32 : f32, i32
%36 = spirv.FAdd %34, %35 : f32
%37 = spirv.GroupNonUniformShuffleXor <Subgroup> %36, %cst8_i32 : f32, i32
%38 = spirv.FAdd %36, %37 : f32
%39 = spirv.GroupNonUniformShuffleXor <Subgroup> %38, %cst16_i32 : f32, i32
%40 = spirv.FAdd %38, %39 : f32
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>
%41 = spirv.UDiv %1, %cst32_i32 : i32
%42 = spirv.UMod %1, %cst32_i32 : i32
%43 = spirv.IEqual %42, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %43, ^bb1, ^bb2
^bb1: // pred: ^bb0
%59 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %59, %40 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%44 = spirv.GL.UMin %42, %cst15_i32 : i32
%45 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %44] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32
%46 = spirv.Load "Workgroup" %45 : f32
%47 = spirv.GroupNonUniformShuffleXor <Subgroup> %46, %cst1_i32 : f32, i32
%48 = spirv.FAdd %46, %47 : f32
%49 = spirv.GroupNonUniformShuffleXor <Subgroup> %48, %cst2_i32 : f32, i32
%50 = spirv.FAdd %48, %49 : f32
%51 = spirv.GroupNonUniformShuffleXor <Subgroup> %50, %cst4_i32 : f32, i32
%52 = spirv.FAdd %50, %51 : f32
%53 = spirv.GroupNonUniformShuffleXor <Subgroup> %52, %cst8_i32 : f32, i32
%54 = spirv.FAdd %52, %53 : f32
%55 = spirv.GroupNonUniformShuffle <Subgroup> %54, %cst0_i32 : f32, i32
%56 = spirv.FAdd %55, %cst_f32_0 : f32
%57 = spirv.FDiv %56, %cst_f32 : f32
%58 = spirv.IEqual %1, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %58, ^bb1, ^bb2
^bb1: // pred: ^bb0
%59 = spirv.IMul %19, %cst32_i32 : i32
%60 = spirv.IAdd %59, %21 : i32
%61 = spirv.IAdd %60, %17 : i32
%62 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %62, %57 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_14_generic_2x32x92160, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_14_generic_2x32x92160 "LocalSize", 512, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_15 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_15_generic_64x92160 ordinal(0) layout(#pipeline_layout4) attributes {translation_info = #translation3, workgroup_size = [512 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c64 = arith.constant 64 : index
%c1 = arith.constant 1 : index
hal.return %c64, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.func @forward_dispatch_15_generic_64x92160() "None" {
%cst23040_i32 = spirv.Constant 23040 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst15_i32 = spirv.Constant 15 : i32
%cst2048_i32 = spirv.Constant 2048 : i32
%cst92160_i32 = spirv.Constant 92160 : i32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%7 = spirv.Load "PushConstant" %6 : i32
%8 = spirv.SLessThan %3, %cst0_i32 : i32
%9 = spirv.ISub %cst-1_i32, %3 : i32
%10 = spirv.Select %8, %9, %3 : i1, i32
%11 = spirv.SDiv %10, %cst16_i32 : i32
%12 = spirv.ISub %cst-1_i32, %11 : i32
%13 = spirv.Select %8, %12, %11 : i1, i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%14 = spirv.SLessThan %5, %cst0_i32 : i32
%15 = spirv.ISub %cst-1_i32, %5 : i32
%16 = spirv.Select %14, %15, %5 : i1, i32
%17 = spirv.SDiv %16, %cst4_i32 : i32
%18 = spirv.ISub %cst-1_i32, %17 : i32
%19 = spirv.Select %14, %18, %17 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%20 = spirv.SLessThan %7, %cst0_i32 : i32
%21 = spirv.ISub %cst-1_i32, %7 : i32
%22 = spirv.Select %20, %21, %7 : i1, i32
%23 = spirv.SDiv %22, %cst4_i32 : i32
%24 = spirv.ISub %cst-1_i32, %23 : i32
%25 = spirv.Select %20, %24, %23 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%26 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32>
%28 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>)
^bb1(%67: i32, %68: vector<4xf32>): // 2 preds: ^bb0, ^bb2
%69 = spirv.SLessThan %67, %cst92160_i32 : i32
spirv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
%70 = spirv.IAdd %27, %19 : i32
%71 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%72 = spirv.Load "StorageBuffer" %71 : f32
%73 = spirv.CompositeConstruct %72, %72, %72, %72 : (f32, f32, f32, f32) -> vector<4xf32>
%74 = spirv.SLessThan %67, %cst0_i32 : i32
%75 = spirv.ISub %cst-1_i32, %67 : i32
%76 = spirv.Select %74, %75, %67 : i1, i32
%77 = spirv.SDiv %76, %cst4_i32 : i32
%78 = spirv.ISub %cst-1_i32, %77 : i32
%79 = spirv.Select %74, %78, %77 : i1, i32
%80 = spirv.IMul %27, %cst23040_i32 : i32
%81 = spirv.IAdd %1, %80 : i32
%82 = spirv.IAdd %79, %81 : i32
%83 = spirv.IAdd %82, %13 : i32
%84 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%85 = spirv.Load "StorageBuffer" %84 : vector<4xf32>
%86 = spirv.FSub %85, %73 : vector<4xf32>
%87 = spirv.FMul %86, %86 : vector<4xf32>
%88 = spirv.FAdd %87, %68 : vector<4xf32>
spirv.Store "Function" %28, %88 : vector<4xf32>
%89 = spirv.IAdd %67, %cst2048_i32 : i32
spirv.Branch ^bb1(%89, %88 : i32, vector<4xf32>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%29 = spirv.Load "Function" %28 : vector<4xf32>
%30 = spirv.IAdd %27, %25 : i32
%31 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%32 = spirv.Load "StorageBuffer" %31 : f32
%33 = spirv.CompositeExtract %29[0 : i32] : vector<4xf32>
%34 = spirv.CompositeExtract %29[1 : i32] : vector<4xf32>
%35 = spirv.CompositeExtract %29[2 : i32] : vector<4xf32>
%36 = spirv.CompositeExtract %29[3 : i32] : vector<4xf32>
%37 = spirv.FAdd %33, %34 : f32
%38 = spirv.FAdd %37, %35 : f32
%39 = spirv.FAdd %38, %36 : f32
%40 = spirv.GroupNonUniformShuffleXor <Subgroup> %39, %cst1_i32 : f32, i32
%41 = spirv.FAdd %39, %40 : f32
%42 = spirv.GroupNonUniformShuffleXor <Subgroup> %41, %cst2_i32 : f32, i32
%43 = spirv.FAdd %41, %42 : f32
%44 = spirv.GroupNonUniformShuffleXor <Subgroup> %43, %cst4_i32 : f32, i32
%45 = spirv.FAdd %43, %44 : f32
%46 = spirv.GroupNonUniformShuffleXor <Subgroup> %45, %cst8_i32 : f32, i32
%47 = spirv.FAdd %45, %46 : f32
%48 = spirv.GroupNonUniformShuffleXor <Subgroup> %47, %cst16_i32 : f32, i32
%49 = spirv.FAdd %47, %48 : f32
%__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>
%50 = spirv.UDiv %1, %cst32_i32 : i32
%51 = spirv.UMod %1, %cst32_i32 : i32
%52 = spirv.IEqual %51, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %52, ^bb1, ^bb2
^bb1: // pred: ^bb0
%67 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %50] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %67, %49 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%53 = spirv.GL.UMin %51, %cst15_i32 : i32
%54 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32>)>, Workgroup>, i32, i32
%55 = spirv.Load "Workgroup" %54 : f32
%56 = spirv.GroupNonUniformShuffleXor <Subgroup> %55, %cst1_i32 : f32, i32
%57 = spirv.FAdd %55, %56 : f32
%58 = spirv.GroupNonUniformShuffleXor <Subgroup> %57, %cst2_i32 : f32, i32
%59 = spirv.FAdd %57, %58 : f32
%60 = spirv.GroupNonUniformShuffleXor <Subgroup> %59, %cst4_i32 : f32, i32
%61 = spirv.FAdd %59, %60 : f32
%62 = spirv.GroupNonUniformShuffleXor <Subgroup> %61, %cst8_i32 : f32, i32
%63 = spirv.FAdd %61, %62 : f32
%64 = spirv.GroupNonUniformShuffle <Subgroup> %63, %cst0_i32 : f32, i32
%65 = spirv.FAdd %64, %32 : f32
%66 = spirv.IEqual %1, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %66, ^bb1, ^bb2
^bb1: // pred: ^bb0
spirv.Store "StorageBuffer" %31, %65 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_15_generic_64x92160, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_15_generic_64x92160 "LocalSize", 512, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_16 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_16_generic_64x10x9216 ordinal(0) layout(#pipeline_layout5) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c72 = arith.constant 72 : index
%c10 = arith.constant 10 : index
%c64 = arith.constant 64 : index
hal.return %c72, %c10, %c64 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_16_generic_64x10x9216() "None" {
%cst3_i32 = spirv.Constant 3 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%cst23040_i32 = spirv.Constant 23040 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 9.216000e+04 : f32
%cst_f32_0 = spirv.Constant 9.99999974E-6 : f32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%7 = spirv.Load "PushConstant" %6 : i32
%8 = spirv.SLessThan %1, %cst0_i32 : i32
%9 = spirv.ISub %cst-1_i32, %1 : i32
%10 = spirv.Select %8, %9, %1 : i1, i32
%11 = spirv.SDiv %10, %cst8_i32 : i32
%12 = spirv.ISub %cst-1_i32, %11 : i32
%13 = spirv.Select %8, %12, %11 : i1, i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%14 = spirv.SLessThan %3, %cst0_i32 : i32
%15 = spirv.ISub %cst-1_i32, %3 : i32
%16 = spirv.Select %14, %15, %3 : i1, i32
%17 = spirv.SDiv %16, %cst4_i32 : i32
%18 = spirv.ISub %cst-1_i32, %17 : i32
%19 = spirv.Select %14, %18, %17 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%20 = spirv.SLessThan %5, %cst0_i32 : i32
%21 = spirv.ISub %cst-1_i32, %5 : i32
%22 = spirv.Select %20, %21, %5 : i1, i32
%23 = spirv.SDiv %22, %cst4_i32 : i32
%24 = spirv.ISub %cst-1_i32, %23 : i32
%25 = spirv.Select %20, %24, %23 : i1, i32
%26 = spirv.SLessThan %7, %cst0_i32 : i32
%27 = spirv.ISub %cst-1_i32, %7 : i32
%28 = spirv.Select %26, %27, %7 : i1, i32
%29 = spirv.SDiv %28, %cst8_i32 : i32
%30 = spirv.ISub %cst-1_i32, %29 : i32
%31 = spirv.Select %26, %30, %29 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%35 = spirv.CompositeExtract %34[1 : i32] : vector<3xi32>
%36 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%37 = spirv.CompositeExtract %36[0 : i32] : vector<3xi32>
%38 = spirv.FConvert %cst_f32_0 : f32 to f16
%39 = spirv.CompositeConstruct %38, %38, %38, %38 : (f16, f16, f16, f16) -> vector<4xf16>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%41 = spirv.CompositeExtract %40[0 : i32] : vector<3xi32>
%42 = spirv.IMul %37, %cst32_i32 : i32
%43 = spirv.IAdd %42, %41 : i32
%44 = spirv.IMul %33, %cst23040_i32 : i32
%45 = spirv.IAdd %43, %44 : i32
%46 = spirv.IMul %35, %cst2304_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IAdd %47, %13 : i32
%49 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%50 = spirv.Load "StorageBuffer" %49 : vector<4xf16>
%51 = spirv.IAdd %33, %19 : i32
%52 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%53 = spirv.Load "StorageBuffer" %52 : f32
%54 = spirv.IAdd %33, %25 : i32
%55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%56 = spirv.Load "StorageBuffer" %55 : f32
%57 = spirv.FDiv %56, %cst_f32 : f32
%58 = spirv.FConvert %57 : f32 to f16
%59 = spirv.CompositeConstruct %58, %58, %58, %58 : (f16, f16, f16, f16) -> vector<4xf16>
%60 = spirv.FAdd %59, %39 : vector<4xf16>
%61 = spirv.FConvert %53 : f32 to f16
%62 = spirv.GL.InverseSqrt %60 : vector<4xf16>
%63 = spirv.CompositeConstruct %61, %61, %61, %61 : (f16, f16, f16, f16) -> vector<4xf16>
%64 = spirv.FSub %50, %63 : vector<4xf16>
%65 = spirv.FMul %64, %62 : vector<4xf16>
%66 = spirv.IAdd %47, %31 : i32
%67 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %67, %65 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_16_generic_64x10x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_16_generic_64x10x9216 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_17 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_17_generic_2x320x96x96 ordinal(0) layout(#pipeline_layout6) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c3 = arith.constant 3 : index
%c24 = arith.constant 24 : index
%c640 = arith.constant 640 : index
hal.return %c3, %c24, %c640 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_17_generic_2x320x96x96() "None" {
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst24_i32 = spirv.Constant 24 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32>
%cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32>
%cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32>
%cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32>
%cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32>
%cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32>
%cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32>
%cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32>
%cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32>
%cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32>
%cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32>
%cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32>
%cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32>
%cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16>
%cst320_i32 = spirv.Constant 320 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%7 = spirv.Load "PushConstant" %6 : i32
%8 = spirv.SLessThan %1, %cst0_i32 : i32
%9 = spirv.ISub %cst-1_i32, %1 : i32
%10 = spirv.Select %8, %9, %1 : i1, i32
%11 = spirv.SDiv %10, %cst8_i32 : i32
%12 = spirv.ISub %cst-1_i32, %11 : i32
%13 = spirv.Select %8, %12, %11 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%14 = spirv.SLessThan %3, %cst0_i32 : i32
%15 = spirv.ISub %cst-1_i32, %3 : i32
%16 = spirv.Select %14, %15, %3 : i1, i32
%17 = spirv.SDiv %16, %cst2_i32 : i32
%18 = spirv.ISub %cst-1_i32, %17 : i32
%19 = spirv.Select %14, %18, %17 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%20 = spirv.SLessThan %5, %cst0_i32 : i32
%21 = spirv.ISub %cst-1_i32, %5 : i32
%22 = spirv.Select %20, %21, %5 : i1, i32
%23 = spirv.SDiv %22, %cst2_i32 : i32
%24 = spirv.ISub %cst-1_i32, %23 : i32
%25 = spirv.Select %20, %24, %23 : i1, i32
%26 = spirv.SLessThan %7, %cst0_i32 : i32
%27 = spirv.ISub %cst-1_i32, %7 : i32
%28 = spirv.Select %26, %27, %7 : i1, i32
%29 = spirv.SDiv %28, %cst8_i32 : i32
%30 = spirv.ISub %cst-1_i32, %29 : i32
%31 = spirv.Select %26, %30, %29 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spirv.UDiv %33, %cst320_i32 : i32
%35 = spirv.UMod %33, %cst320_i32 : i32
%36 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%37 = spirv.CompositeExtract %36[1 : i32] : vector<3xi32>
%38 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%39 = spirv.CompositeExtract %38[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%41 = spirv.CompositeExtract %40[1 : i32] : vector<3xi32>
%42 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%43 = spirv.CompositeExtract %42[0 : i32] : vector<3xi32>
%44 = spirv.IMul %41, %cst24_i32 : i32
%45 = spirv.IMul %37, %cst96_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IMul %39, %cst8_i32 : i32
%48 = spirv.IAdd %46, %47 : i32
%49 = spirv.IAdd %48, %43 : i32
%50 = spirv.IMul %34, %cst737280_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IMul %35, %cst2304_i32 : i32
%53 = spirv.IAdd %51, %52 : i32
%54 = spirv.IAdd %53, %13 : i32
%55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%56 = spirv.Load "StorageBuffer" %55 : vector<4xf16>
%57 = spirv.IAdd %35, %19 : i32
%58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%59 = spirv.Load "StorageBuffer" %58 : f16
%60 = spirv.IAdd %35, %25 : i32
%61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%62 = spirv.Load "StorageBuffer" %61 : f16
%63 = spirv.CompositeConstruct %59, %59, %59, %59 : (f16, f16, f16, f16) -> vector<4xf16>
%64 = spirv.FMul %56, %63 : vector<4xf16>
%65 = spirv.CompositeConstruct %62, %62, %62, %62 : (f16, f16, f16, f16) -> vector<4xf16>
%66 = spirv.FAdd %64, %65 : vector<4xf16>
%67 = spirv.FNegate %66 : vector<4xf16>
%68 = spirv.FConvert %67 : vector<4xf16> to vector<4xf32>
%69 = spirv.IsNan %68 : vector<4xf32>
%70 = spirv.LogicalOr %69, %69 : vector<4xi1>
%71 = spirv.FMul %68, %cst_vec_4xf32_0 : vector<4xf32>
%72 = spirv.GL.Floor %71 : vector<4xf32>
%73 = spirv.FMul %72, %cst_vec_4xf32 : vector<4xf32>
%74 = spirv.FSub %68, %73 : vector<4xf32>
%75 = spirv.FMul %74, %74 : vector<4xf32>
%76 = spirv.FMul %75, %75 : vector<4xf32>
%77 = spirv.GL.Fma %cst_vec_4xf32_1, %74, %cst_vec_4xf32_1 : vector<4xf32>
%78 = spirv.GL.Fma %cst_vec_4xf32_3, %74, %cst_vec_4xf32_2 : vector<4xf32>
%79 = spirv.GL.Fma %cst_vec_4xf32_5, %74, %cst_vec_4xf32_4 : vector<4xf32>
%80 = spirv.GL.Fma %78, %75, %77 : vector<4xf32>
%81 = spirv.GL.Fma %79, %76, %80 : vector<4xf32>
%82 = spirv.ConvertFToS %72 : vector<4xf32> to vector<4xi32>
%83 = spirv.IAdd %82, %cst_vec_4xi32_6 : vector<4xi32>
%84 = spirv.ShiftLeftLogical %83, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%85 = spirv.Bitcast %84 : vector<4xi32> to vector<4xf32>
%86 = spirv.FMul %81, %85 : vector<4xf32>
%87 = spirv.SLessThanEqual %82, %cst_vec_4xi32_6 : vector<4xi32>
%88 = spirv.SGreaterThanEqual %82, %cst_vec_4xi32_11 : vector<4xi32>
%89 = spirv.FOrdEqual %68, %cst_vec_4xf32_9 : vector<4xf32>
%90 = spirv.FOrdEqual %68, %cst_vec_4xf32_8 : vector<4xf32>
%91 = spirv.FOrdGreaterThan %68, %cst_vec_4xf32_7 : vector<4xf32>
%92 = spirv.LogicalAnd %87, %88 : vector<4xi1>
%93 = spirv.Select %91, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%94 = spirv.Select %92, %86, %93 : vector<4xi1>, vector<4xf32>
%95 = spirv.Select %90, %cst_vec_4xf32_8, %94 : vector<4xi1>, vector<4xf32>
%96 = spirv.Select %89, %cst_vec_4xf32_7, %95 : vector<4xi1>, vector<4xf32>
%97 = spirv.Select %70, %68, %96 : vector<4xi1>, vector<4xf32>
%98 = spirv.FConvert %97 : vector<4xf32> to vector<4xf16>
%99 = spirv.FAdd %98, %cst_vec_4xf16 : vector<4xf16>
%100 = spirv.FDiv %cst_vec_4xf16, %99 : vector<4xf16>
%101 = spirv.FMul %100, %66 : vector<4xf16>
%102 = spirv.IAdd %53, %31 : i32
%103 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %103, %101 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_17_generic_2x320x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_17_generic_2x320x96x96 "LocalSize", 8, 4, 1
}
}
}
}
hal.executable private @forward_dispatch_18 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_18 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c3 = arith.constant 3 : index
%c96 = arith.constant 96 : index
%c640 = arith.constant 640 : index
hal.return %c3, %c96, %c640 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_18() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst99_i32 = spirv.Constant 99 : i32
%cst3073280_i32 = spirv.Constant 3073280 : i32
%cst98_i32 = spirv.Constant 98 : i32
%cst9604_i32 = spirv.Constant 9604 : i32
%cst2949120_i32 = spirv.Constant 2949120 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst320_i32 = spirv.Constant 320 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spirv.UMod %21, %cst320_i32 : i32
%23 = spirv.UDiv %21, %cst320_i32 : i32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32>
%26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32>
%28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32>
%30 = spirv.IMul %22, %cst9216_i32 : i32
%31 = spirv.IMul %29, %cst9216_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IMul %19, %cst96_i32 : i32
%34 = spirv.IAdd %32, %33 : i32
%35 = spirv.IMul %27, %cst96_i32 : i32
%36 = spirv.IAdd %34, %35 : i32
%37 = spirv.IAdd %36, %25 : i32
%38 = spirv.IMul %17, %cst32_i32 : i32
%39 = spirv.IAdd %37, %38 : i32
%40 = spirv.IMul %23, %cst2949120_i32 : i32
%41 = spirv.IAdd %39, %40 : i32
%42 = spirv.IAdd %41, %9 : i32
%43 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %42] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%44 = spirv.Load "StorageBuffer" %43 : f16
%45 = spirv.IMul %22, %cst9604_i32 : i32
%46 = spirv.IMul %29, %cst9604_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IMul %19, %cst98_i32 : i32
%49 = spirv.IAdd %47, %48 : i32
%50 = spirv.IMul %27, %cst98_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IAdd %51, %25 : i32
%53 = spirv.IAdd %52, %38 : i32
%54 = spirv.IMul %23, %cst3073280_i32 : i32
%55 = spirv.IAdd %53, %54 : i32
%56 = spirv.IAdd %55, %15 : i32
%57 = spirv.IAdd %56, %cst99_i32 : i32
%58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %58, %44 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_18, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_18 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_19 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_19_generic_2x320x3x3x96x96 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
%c3 = arith.constant 3 : index
%c96 = arith.constant 96 : index
%c5760 = arith.constant 5760 : index
hal.return %c3, %c96, %c5760 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_19_generic_2x320x3x3x96x96() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst27648_i32 = spirv.Constant 27648 : i32
%cst82944_i32 = spirv.Constant 82944 : i32
%cst26542080_i32 = spirv.Constant 26542080 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst9604_i32 = spirv.Constant 9604 : i32
%cst3073280_i32 = spirv.Constant 3073280 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst98_i32 = spirv.Constant 98 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst320_i32 = spirv.Constant 320 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.UDiv %17, %cst3_i32 : i32
%19 = spirv.UDiv %18, %cst3_i32 : i32
%20 = spirv.UDiv %19, %cst320_i32 : i32
%21 = spirv.UMod %19, %cst320_i32 : i32
%22 = spirv.UMod %18, %cst3_i32 : i32
%23 = spirv.UMod %17, %cst3_i32 : i32
%24 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32>
%26 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%29 = spirv.CompositeExtract %28[0 : i32] : vector<3xi32>
%30 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%31 = spirv.CompositeExtract %30[1 : i32] : vector<3xi32>
%32 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spirv.IMul %31, %cst98_i32 : i32
%35 = spirv.IMul %22, %cst98_i32 : i32
%36 = spirv.IAdd %34, %35 : i32
%37 = spirv.IMul %25, %cst98_i32 : i32
%38 = spirv.IAdd %36, %37 : i32
%39 = spirv.IAdd %38, %23 : i32
%40 = spirv.IMul %27, %cst32_i32 : i32
%41 = spirv.IAdd %39, %40 : i32
%42 = spirv.IAdd %41, %33 : i32
%43 = spirv.IAdd %42, %29 : i32
%44 = spirv.IMul %20, %cst3073280_i32 : i32
%45 = spirv.IAdd %43, %44 : i32
%46 = spirv.IMul %21, %cst9604_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IAdd %47, %9 : i32
%49 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%50 = spirv.Load "StorageBuffer" %49 : f16
%51 = spirv.IMul %23, %cst9216_i32 : i32
%52 = spirv.IMul %33, %cst9216_i32 : i32
%53 = spirv.IAdd %51, %52 : i32
%54 = spirv.IMul %25, %cst96_i32 : i32
%55 = spirv.IAdd %53, %54 : i32
%56 = spirv.IMul %31, %cst96_i32 : i32
%57 = spirv.IAdd %55, %56 : i32
%58 = spirv.IAdd %57, %29 : i32
%59 = spirv.IAdd %58, %40 : i32
%60 = spirv.IMul %20, %cst26542080_i32 : i32
%61 = spirv.IAdd %59, %60 : i32
%62 = spirv.IMul %21, %cst82944_i32 : i32
%63 = spirv.IAdd %61, %62 : i32
%64 = spirv.IMul %22, %cst27648_i32 : i32
%65 = spirv.IAdd %63, %64 : i32
%66 = spirv.IAdd %65, %15 : i32
%67 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %67, %50 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_19_generic_2x320x3x3x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_19_generic_2x320x3x3x96x96 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_20 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_20_generic_2x1280 ordinal(0) layout(#pipeline_layout2) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c10 = arith.constant 10 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
hal.return %c10, %c2, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_20_generic_2x1280() "None" {
%cst23264_i32 = spirv.Constant 23264 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst216398400_i32 = spirv.Constant 216398400 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.693147182> : vector<4xf32>
%cst_vec_4xf32_0 = spirv.Constant dense<1.44269502> : vector<4xf32>
%cst_vec_4xf32_1 = spirv.Constant dense<1.000000e+00> : vector<4xf32>
%cst_vec_4xf32_2 = spirv.Constant dense<0.499705136> : vector<4xf32>
%cst_vec_4xf32_3 = spirv.Constant dense<0.168738902> : vector<4xf32>
%cst_vec_4xf32_4 = spirv.Constant dense<0.0366896503> : vector<4xf32>
%cst_vec_4xf32_5 = spirv.Constant dense<1.314350e-02> : vector<4xf32>
%cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32>
%cst_vec_4xi32_6 = spirv.Constant dense<127> : vector<4xi32>
%cst_vec_4xf32_7 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32>
%cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32>
%cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32>
%cst_vec_4xi32_11 = spirv.Constant dense<-127> : vector<4xi32>
%cst_vec_4xf16 = spirv.Constant dense<1.000000e+00> : vector<4xf16>
%cst0_i32 = spirv.Constant 0 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[1 : i32] : vector<3xi32>
%2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%3 = spirv.CompositeExtract %2[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%4 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%5 = spirv.CompositeExtract %4[0 : i32] : vector<3xi32>
%6 = spirv.IMul %3, %cst32_i32 : i32
%7 = spirv.IAdd %6, %5 : i32
%8 = spirv.IAdd %7, %cst216398400_i32 : i32
%9 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %8] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%10 = spirv.Load "StorageBuffer" %9 : vector<4xf16>
%11 = spirv.IMul %1, %cst320_i32 : i32
%12 = spirv.IAdd %7, %11 : i32
%13 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %12] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%14 = spirv.Load "StorageBuffer" %13 : vector<4xf16>
%15 = spirv.FAdd %10, %14 : vector<4xf16>
%16 = spirv.FNegate %15 : vector<4xf16>
%17 = spirv.FConvert %16 : vector<4xf16> to vector<4xf32>
%18 = spirv.IsNan %17 : vector<4xf32>
%19 = spirv.LogicalOr %18, %18 : vector<4xi1>
%20 = spirv.FMul %17, %cst_vec_4xf32_0 : vector<4xf32>
%21 = spirv.GL.Floor %20 : vector<4xf32>
%22 = spirv.FMul %21, %cst_vec_4xf32 : vector<4xf32>
%23 = spirv.FSub %17, %22 : vector<4xf32>
%24 = spirv.FMul %23, %23 : vector<4xf32>
%25 = spirv.FMul %24, %24 : vector<4xf32>
%26 = spirv.GL.Fma %cst_vec_4xf32_1, %23, %cst_vec_4xf32_1 : vector<4xf32>
%27 = spirv.GL.Fma %cst_vec_4xf32_3, %23, %cst_vec_4xf32_2 : vector<4xf32>
%28 = spirv.GL.Fma %cst_vec_4xf32_5, %23, %cst_vec_4xf32_4 : vector<4xf32>
%29 = spirv.GL.Fma %27, %24, %26 : vector<4xf32>
%30 = spirv.GL.Fma %28, %25, %29 : vector<4xf32>
%31 = spirv.ConvertFToS %21 : vector<4xf32> to vector<4xi32>
%32 = spirv.IAdd %31, %cst_vec_4xi32_6 : vector<4xi32>
%33 = spirv.ShiftLeftLogical %32, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%34 = spirv.Bitcast %33 : vector<4xi32> to vector<4xf32>
%35 = spirv.FMul %30, %34 : vector<4xf32>
%36 = spirv.SLessThanEqual %31, %cst_vec_4xi32_6 : vector<4xi32>
%37 = spirv.SGreaterThanEqual %31, %cst_vec_4xi32_11 : vector<4xi32>
%38 = spirv.FOrdEqual %17, %cst_vec_4xf32_9 : vector<4xf32>
%39 = spirv.FOrdEqual %17, %cst_vec_4xf32_8 : vector<4xf32>
%40 = spirv.FOrdGreaterThan %17, %cst_vec_4xf32_7 : vector<4xf32>
%41 = spirv.LogicalAnd %36, %37 : vector<4xi1>
%42 = spirv.Select %40, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%43 = spirv.Select %41, %35, %42 : vector<4xi1>, vector<4xf32>
%44 = spirv.Select %39, %cst_vec_4xf32_8, %43 : vector<4xi1>, vector<4xf32>
%45 = spirv.Select %38, %cst_vec_4xf32_7, %44 : vector<4xi1>, vector<4xf32>
%46 = spirv.Select %19, %17, %45 : vector<4xi1>, vector<4xf32>
%47 = spirv.FConvert %46 : vector<4xf32> to vector<4xf16>
%48 = spirv.FAdd %47, %cst_vec_4xf16 : vector<4xf16>
%49 = spirv.FDiv %cst_vec_4xf16, %48 : vector<4xf16>
%50 = spirv.FMul %49, %15 : vector<4xf16>
%51 = spirv.IAdd %12, %cst23264_i32 : i32
%52 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %52, %50 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_20_generic_2x1280, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_20_generic_2x1280 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_21 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_21_matmul_32x320x1280 ordinal(0) layout(#pipeline_layout7) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [128 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c5 = arith.constant 5 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_21_matmul_32x320x1280() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst-320_i32 = spirv.Constant -320 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst11632_i32 = spirv.Constant 11632 : i32
%cst156_i32 = spirv.Constant 156 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst5120_i32 = spirv.Constant 5120 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst1248_i32 = spirv.Constant 1248 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%9 = spirv.SLessThan %8, %cst0_i32 : i32
%10 = spirv.ISub %cst-1_i32, %8 : i32
%11 = spirv.Select %9, %10, %8 : i1, i32
%12 = spirv.SDiv %11, %cst16_i32 : i32
%13 = spirv.ISub %cst-1_i32, %12 : i32
%14 = spirv.Select %9, %13, %12 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%15 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%16 = spirv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%18 = spirv.CompositeExtract %17[0 : i32] : vector<3xi32>
%19 = spirv.IMul %4, %cst5120_i32 : i32
%20 = spirv.IAdd %2, %19 : i32
%21 = spirv.IMul %6, %cst5120_i32 : i32
%22 = spirv.IAdd %20, %21 : i32
%23 = spirv.SLessThan %2, %cst0_i32 : i32
%24 = spirv.ISub %cst-1_i32, %2 : i32
%25 = spirv.Select %23, %24, %2 : i1, i32
%26 = spirv.SDiv %25, %cst4_i32 : i32
%27 = spirv.ISub %cst-1_i32, %26 : i32
%28 = spirv.Select %23, %27, %26 : i1, i32
%29 = spirv.IMul %28, %cst156_i32 : i32
%30 = spirv.IAdd %22, %29 : i32
%31 = spirv.IAdd %30, %cst11632_i32 : i32
%32 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%33 = spirv.Load "StorageBuffer" %32 : vector<4xf32>
%34 = spirv.IMul %4, %cst160_i32 : i32
%35 = spirv.IAdd %2, %34 : i32
%36 = spirv.IMul %6, %cst160_i32 : i32
%37 = spirv.IAdd %35, %36 : i32
%38 = spirv.IAdd %37, %28 : i32
%39 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %39, %33 : vector<4xf32>
%40 = spirv.IMul %4, %cst640_i32 : i32
%41 = spirv.IAdd %2, %40 : i32
%42 = spirv.IMul %6, %cst640_i32 : i32
%43 = spirv.IAdd %41, %42 : i32
%44 = spirv.IMul %18, %cst8_i32 : i32
%45 = spirv.IAdd %43, %44 : i32
%46 = spirv.IAdd %45, %14 : i32
%47 = spirv.SDiv %25, %cst8_i32 : i32
%48 = spirv.ISub %cst-1_i32, %47 : i32
%49 = spirv.Select %23, %48, %47 : i1, i32
%50 = spirv.IMul %49, %cst32_i32 : i32
%51 = spirv.IAdd %46, %50 : i32
%52 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %51] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%53 = spirv.Load "StorageBuffer" %52 : vector<4xf32>
%54 = spirv.IMul %4, %cst144_i32 : i32
%55 = spirv.IAdd %2, %54 : i32
%56 = spirv.IMul %6, %cst144_i32 : i32
%57 = spirv.IAdd %55, %56 : i32
%58 = spirv.IAdd %57, %49 : i32
%59 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %59, %53 : vector<4xf32>
%60 = spirv.IAdd %51, %cst640_i32 : i32
%61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%62 = spirv.Load "StorageBuffer" %61 : vector<4xf32>
%63 = spirv.IAdd %58, %cst144_i32 : i32
%64 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %63] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %64, %62 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%65 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%66 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%67 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%106: i32, %107: !spirv.coopmatrix<16x16xf16, Subgroup>, %108: !spirv.coopmatrix<16x16xf16, Subgroup>, %109: i32): // 2 preds: ^bb0, ^bb2
%110 = spirv.SLessThan %106, %cst1248_i32 : i32
spirv.BranchConditional %110, ^bb2, ^bb3
^bb2: // pred: ^bb1
%111 = spirv.IMul %109, %cst160_i32 : i32
%112 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %111] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%113 = spirv.NV.CooperativeMatrixLoad %112, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%114 = spirv.IAdd %111, %cst2_i32 : i32
%115 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%116 = spirv.NV.CooperativeMatrixLoad %115, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%117 = spirv.IAdd %111, %cst80_i32 : i32
%118 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%119 = spirv.NV.CooperativeMatrixLoad %118, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%120 = spirv.IAdd %111, %cst82_i32 : i32
%121 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%123 = spirv.IMul %109, %cst288_i32 : i32
%124 = spirv.SDiv %25, %cst32_i32 : i32
%125 = spirv.ISub %cst-1_i32, %124 : i32
%126 = spirv.Select %23, %125, %124 : i1, i32
%127 = spirv.IMul %126, %cst2_i32 : i32
%128 = spirv.IAdd %123, %127 : i32
%129 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.IAdd %128, %cst144_i32 : i32
%132 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%134 = spirv.NV.CooperativeMatrixMulAdd %113, %130, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%135 = spirv.NV.CooperativeMatrixMulAdd %116, %133, %134 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%136 = spirv.NV.CooperativeMatrixMulAdd %119, %130, %108 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%137 = spirv.NV.CooperativeMatrixMulAdd %122, %133, %136 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%138 = spirv.IAdd %106, %cst32_i32 : i32
%139 = spirv.SLessThan %138, %cst0_i32 : i32
%140 = spirv.ISub %cst-33_i32, %106 : i32
%141 = spirv.Select %139, %140, %138 : i1, i32
%142 = spirv.SDiv %141, %cst8_i32 : i32
%143 = spirv.ISub %cst-1_i32, %142 : i32
%144 = spirv.Select %139, %143, %142 : i1, i32
%145 = spirv.IAdd %22, %144 : i32
%146 = spirv.IAdd %145, %29 : i32
%147 = spirv.IAdd %146, %cst11632_i32 : i32
%148 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %147] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%149 = spirv.Load "StorageBuffer" %148 : vector<4xf32>
%150 = spirv.SDiv %141, %cst32_i32 : i32
%151 = spirv.ISub %cst-1_i32, %150 : i32
%152 = spirv.Select %139, %151, %150 : i1, i32
%153 = spirv.GL.SAbs %152 : i32
%154 = spirv.GL.SAbs %cst2_i32 : i32
%155 = spirv.UMod %153, %154 : i32
%156 = spirv.IEqual %152, %153 : i32
%157 = spirv.SNegate %155 : i32
%158 = spirv.Select %156, %155, %157 : i1, i32
%159 = spirv.SLessThan %158, %cst0_i32 : i32
%160 = spirv.IAdd %158, %cst2_i32 : i32
%161 = spirv.Select %159, %160, %158 : i1, i32
%162 = spirv.IMul %152, %cst160_i32 : i32
%163 = spirv.IAdd %162, %37 : i32
%164 = spirv.SLessThan %152, %cst0_i32 : i32
%165 = spirv.ISub %cst-1_i32, %152 : i32
%166 = spirv.Select %164, %165, %152 : i1, i32
%167 = spirv.SDiv %166, %cst2_i32 : i32
%168 = spirv.ISub %cst-1_i32, %167 : i32
%169 = spirv.Select %164, %168, %167 : i1, i32
%170 = spirv.IMul %169, %cst-320_i32 : i32
%171 = spirv.IAdd %163, %170 : i32
%172 = spirv.IAdd %171, %28 : i32
%173 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %173, %149 : vector<4xf32>
%174 = spirv.IMul %138, %cst40_i32 : i32
%175 = spirv.IAdd %174, %2 : i32
%176 = spirv.IAdd %175, %40 : i32
%177 = spirv.IAdd %176, %42 : i32
%178 = spirv.IAdd %177, %44 : i32
%179 = spirv.IAdd %178, %14 : i32
%180 = spirv.IAdd %179, %50 : i32
%181 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%182 = spirv.Load "StorageBuffer" %181 : vector<4xf32>
%183 = spirv.IMul %152, %cst288_i32 : i32
%184 = spirv.IAdd %183, %57 : i32
%185 = spirv.IMul %169, %cst-576_i32 : i32
%186 = spirv.IAdd %184, %185 : i32
%187 = spirv.IAdd %186, %49 : i32
%188 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %188, %182 : vector<4xf32>
%189 = spirv.IAdd %180, %cst640_i32 : i32
%190 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %189] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%191 = spirv.Load "StorageBuffer" %190 : vector<4xf32>
%192 = spirv.IAdd %187, %cst144_i32 : i32
%193 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %192] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %193, %191 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %65, %135 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %66, %137 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %67, %161 : i32
spirv.Branch ^bb1(%138, %135, %137, %161 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%68 = spirv.Load "Function" %67 : i32
%69 = spirv.Load "Function" %66 : !spirv.coopmatrix<16x16xf16, Subgroup>
%70 = spirv.Load "Function" %65 : !spirv.coopmatrix<16x16xf16, Subgroup>
%71 = spirv.IMul %68, %cst160_i32 : i32
%72 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %71] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%73 = spirv.NV.CooperativeMatrixLoad %72, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%74 = spirv.IAdd %71, %cst2_i32 : i32
%75 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%76 = spirv.NV.CooperativeMatrixLoad %75, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%77 = spirv.IAdd %71, %cst80_i32 : i32
%78 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%79 = spirv.NV.CooperativeMatrixLoad %78, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%80 = spirv.IAdd %71, %cst82_i32 : i32
%81 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %80] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%82 = spirv.NV.CooperativeMatrixLoad %81, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%83 = spirv.IMul %68, %cst288_i32 : i32
%84 = spirv.SDiv %25, %cst32_i32 : i32
%85 = spirv.ISub %cst-1_i32, %84 : i32
%86 = spirv.Select %23, %85, %84 : i1, i32
%87 = spirv.IMul %86, %cst2_i32 : i32
%88 = spirv.IAdd %83, %87 : i32
%89 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%90 = spirv.NV.CooperativeMatrixLoad %89, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%91 = spirv.IAdd %88, %cst144_i32 : i32
%92 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%93 = spirv.NV.CooperativeMatrixLoad %92, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%94 = spirv.NV.CooperativeMatrixMulAdd %73, %90, %70 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%95 = spirv.NV.CooperativeMatrixMulAdd %76, %93, %94 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%96 = spirv.NV.CooperativeMatrixMulAdd %79, %90, %69 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%97 = spirv.NV.CooperativeMatrixMulAdd %82, %93, %96 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%98 = spirv.IMul %16, %cst1280_i32 : i32
%99 = spirv.IMul %4, %cst1280_i32 : i32
%100 = spirv.IAdd %98, %99 : i32
%101 = spirv.IAdd %100, %44 : i32
%102 = spirv.IAdd %101, %87 : i32
%103 = spirv.IAdd %102, %cst640_i32 : i32
%104 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %104, %97, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%105 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %105, %95, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_21_matmul_32x320x1280, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_21_matmul_32x320x1280 "LocalSize", 128, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_22 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_22_generic_2x320x9216x2880 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c144 = arith.constant 144 : index
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
hal.return %c144, %c5, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0__1 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1__0 bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_22_generic_2x320x9216x2880() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst55296_i32 = spirv.Constant 55296 : i32
%cst432_i32 = spirv.Constant 432 : i32
%cst48_i32 = spirv.Constant 48 : i32
%cst36864_i32 = spirv.Constant 36864 : i32
%cst368640_i32 = spirv.Constant 368640 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst576_i32 = spirv.Constant 576 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst1152_i32 = spirv.Constant 1152 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst1144_i32 = spirv.Constant 1144 : i32
%cst3317760_i32 = spirv.Constant 3317760 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst18432_i32 = spirv.Constant 18432 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst356_i32 = spirv.Constant 356 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst11520_i32 = spirv.Constant 11520 : i32
%cst5760_i32 = spirv.Constant 5760 : i32
%cst23040_i32 = spirv.Constant 23040 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst0_i32 = spirv.Constant 0 : i32
%cst2848_i32 = spirv.Constant 2848 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__8_addr = spirv.mlir.addressof @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%14 = spirv.Load "PushConstant" %13 : i32
%15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%16 = spirv.Load "PushConstant" %15 : i32
%__resource_var_0_0__1_addr = spirv.mlir.addressof @__resource_var_0_0__1 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%17 = spirv.SLessThan %10, %cst0_i32 : i32
%18 = spirv.ISub %cst-1_i32, %10 : i32
%19 = spirv.Select %17, %18, %10 : i1, i32
%20 = spirv.SDiv %19, %cst16_i32 : i32
%21 = spirv.ISub %cst-1_i32, %20 : i32
%22 = spirv.Select %17, %21, %20 : i1, i32
%__resource_var_0_1__0_addr = spirv.mlir.addressof @__resource_var_0_1__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%23 = spirv.SLessThan %8, %cst0_i32 : i32
%24 = spirv.ISub %cst-1_i32, %8 : i32
%25 = spirv.Select %23, %24, %8 : i1, i32
%26 = spirv.SDiv %25, %cst16_i32 : i32
%27 = spirv.ISub %cst-1_i32, %26 : i32
%28 = spirv.Select %23, %27, %26 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%29 = spirv.SLessThan %12, %cst0_i32 : i32
%30 = spirv.ISub %cst-1_i32, %12 : i32
%31 = spirv.Select %29, %30, %12 : i1, i32
%32 = spirv.SDiv %31, %cst2_i32 : i32
%33 = spirv.ISub %cst-1_i32, %32 : i32
%34 = spirv.Select %29, %33, %32 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%35 = spirv.SLessThan %14, %cst0_i32 : i32
%36 = spirv.ISub %cst-1_i32, %14 : i32
%37 = spirv.Select %35, %36, %14 : i1, i32
%38 = spirv.SDiv %37, %cst2_i32 : i32
%39 = spirv.ISub %cst-1_i32, %38 : i32
%40 = spirv.Select %35, %39, %38 : i1, i32
%41 = spirv.SLessThan %16, %cst0_i32 : i32
%42 = spirv.ISub %cst-1_i32, %16 : i32
%43 = spirv.Select %41, %42, %16 : i1, i32
%44 = spirv.SDiv %43, %cst16_i32 : i32
%45 = spirv.ISub %cst-1_i32, %44 : i32
%46 = spirv.Select %41, %45, %44 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%48 = spirv.CompositeExtract %47[2 : i32] : vector<3xi32>
%49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%50 = spirv.CompositeExtract %49[1 : i32] : vector<3xi32>
%51 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%52 = spirv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spirv.IMul %50, %cst23040_i32 : i32
%54 = spirv.IAdd %53, %2 : i32
%55 = spirv.IMul %4, %cst5760_i32 : i32
%56 = spirv.IAdd %54, %55 : i32
%57 = spirv.IMul %6, %cst11520_i32 : i32
%58 = spirv.IAdd %56, %57 : i32
%59 = spirv.IAdd %58, %22 : i32
%60 = spirv.SLessThan %2, %cst0_i32 : i32
%61 = spirv.ISub %cst-1_i32, %2 : i32
%62 = spirv.Select %60, %61, %2 : i1, i32
%63 = spirv.SDiv %62, %cst4_i32 : i32
%64 = spirv.ISub %cst-1_i32, %63 : i32
%65 = spirv.Select %60, %64, %63 : i1, i32
%66 = spirv.IMul %65, %cst356_i32 : i32
%67 = spirv.IAdd %59, %66 : i32
%68 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%69 = spirv.Load "StorageBuffer" %68 : vector<4xf32>
%70 = spirv.IMul %4, %cst80_i32 : i32
%71 = spirv.IAdd %2, %70 : i32
%72 = spirv.IMul %6, %cst160_i32 : i32
%73 = spirv.IAdd %71, %72 : i32
%74 = spirv.IAdd %73, %65 : i32
%75 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %75, %69 : vector<4xf32>
%76 = spirv.IAdd %67, %cst11520_i32 : i32
%77 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%78 = spirv.Load "StorageBuffer" %77 : vector<4xf32>
%79 = spirv.IAdd %74, %cst160_i32 : i32
%80 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %80, %78 : vector<4xf32>
%81 = spirv.IMul %4, %cst9216_i32 : i32
%82 = spirv.IAdd %2, %81 : i32
%83 = spirv.IMul %6, %cst18432_i32 : i32
%84 = spirv.IAdd %82, %83 : i32
%85 = spirv.IMul %52, %cst8_i32 : i32
%86 = spirv.IAdd %84, %85 : i32
%87 = spirv.IMul %48, %cst3317760_i32 : i32
%88 = spirv.IAdd %86, %87 : i32
%89 = spirv.IAdd %88, %28 : i32
%90 = spirv.SDiv %62, %cst8_i32 : i32
%91 = spirv.ISub %cst-1_i32, %90 : i32
%92 = spirv.Select %60, %91, %90 : i1, i32
%93 = spirv.IMul %92, %cst1144_i32 : i32
%94 = spirv.IAdd %89, %93 : i32
%95 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%96 = spirv.Load "StorageBuffer" %95 : vector<4xf32>
%97 = spirv.IMul %4, %cst72_i32 : i32
%98 = spirv.IAdd %2, %97 : i32
%99 = spirv.IMul %6, %cst144_i32 : i32
%100 = spirv.IAdd %98, %99 : i32
%101 = spirv.IAdd %100, %92 : i32
%102 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %102, %96 : vector<4xf32>
%103 = spirv.IAdd %94, %cst18432_i32 : i32
%104 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%105 = spirv.Load "StorageBuffer" %104 : vector<4xf32>
%106 = spirv.IAdd %101, %cst144_i32 : i32
%107 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %107, %105 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%108 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%109 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%110 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%111 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%112 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%305: i32, %306: !spirv.coopmatrix<16x16xf16, Subgroup>, %307: !spirv.coopmatrix<16x16xf16, Subgroup>, %308: !spirv.coopmatrix<16x16xf16, Subgroup>, %309: !spirv.coopmatrix<16x16xf16, Subgroup>, %310: i32): // 2 preds: ^bb0, ^bb2
%311 = spirv.SLessThan %305, %cst2848_i32 : i32
spirv.BranchConditional %311, ^bb2, ^bb3
^bb2: // pred: ^bb1
%312 = spirv.IMul %310, %cst320_i32 : i32
%313 = spirv.IMul %4, %cst160_i32 : i32
%314 = spirv.IAdd %312, %313 : i32
%315 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %314] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%316 = spirv.NV.CooperativeMatrixLoad %315, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%317 = spirv.IAdd %314, %cst2_i32 : i32
%318 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %317] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%319 = spirv.NV.CooperativeMatrixLoad %318, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%320 = spirv.IAdd %314, %cst80_i32 : i32
%321 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %320] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%322 = spirv.NV.CooperativeMatrixLoad %321, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%323 = spirv.IAdd %314, %cst82_i32 : i32
%324 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %323] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%325 = spirv.NV.CooperativeMatrixLoad %324, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%326 = spirv.IMul %310, %cst288_i32 : i32
%327 = spirv.IMul %6, %cst288_i32 : i32
%328 = spirv.IAdd %326, %327 : i32
%329 = spirv.SDiv %62, %cst32_i32 : i32
%330 = spirv.ISub %cst-1_i32, %329 : i32
%331 = spirv.Select %60, %330, %329 : i1, i32
%332 = spirv.IMul %331, %cst4_i32 : i32
%333 = spirv.IAdd %328, %332 : i32
%334 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %333] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%335 = spirv.NV.CooperativeMatrixLoad %334, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%336 = spirv.IAdd %333, %cst2_i32 : i32
%337 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %336] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%338 = spirv.NV.CooperativeMatrixLoad %337, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%339 = spirv.IAdd %333, %cst144_i32 : i32
%340 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %339] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%341 = spirv.NV.CooperativeMatrixLoad %340, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%342 = spirv.IAdd %333, %cst146_i32 : i32
%343 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %342] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%344 = spirv.NV.CooperativeMatrixLoad %343, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%345 = spirv.NV.CooperativeMatrixMulAdd %316, %335, %306 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%346 = spirv.NV.CooperativeMatrixMulAdd %319, %341, %345 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%347 = spirv.NV.CooperativeMatrixMulAdd %316, %338, %307 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%348 = spirv.NV.CooperativeMatrixMulAdd %319, %344, %347 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%349 = spirv.NV.CooperativeMatrixMulAdd %322, %335, %308 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%350 = spirv.NV.CooperativeMatrixMulAdd %325, %341, %349 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%351 = spirv.NV.CooperativeMatrixMulAdd %322, %338, %309 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%352 = spirv.NV.CooperativeMatrixMulAdd %325, %344, %351 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%353 = spirv.IAdd %305, %cst32_i32 : i32
%354 = spirv.SLessThan %353, %cst0_i32 : i32
%355 = spirv.ISub %cst-33_i32, %305 : i32
%356 = spirv.Select %354, %355, %353 : i1, i32
%357 = spirv.SDiv %356, %cst8_i32 : i32
%358 = spirv.ISub %cst-1_i32, %357 : i32
%359 = spirv.Select %354, %358, %357 : i1, i32
%360 = spirv.IAdd %59, %359 : i32
%361 = spirv.IAdd %360, %66 : i32
%362 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %361] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%363 = spirv.Load "StorageBuffer" %362 : vector<4xf32>
%364 = spirv.SDiv %356, %cst32_i32 : i32
%365 = spirv.ISub %cst-1_i32, %364 : i32
%366 = spirv.Select %354, %365, %364 : i1, i32
%367 = spirv.GL.SAbs %366 : i32
%368 = spirv.GL.SAbs %cst2_i32 : i32
%369 = spirv.UMod %367, %368 : i32
%370 = spirv.IEqual %366, %367 : i32
%371 = spirv.SNegate %369 : i32
%372 = spirv.Select %370, %369, %371 : i1, i32
%373 = spirv.SLessThan %372, %cst0_i32 : i32
%374 = spirv.IAdd %372, %cst2_i32 : i32
%375 = spirv.Select %373, %374, %372 : i1, i32
%376 = spirv.IMul %366, %cst320_i32 : i32
%377 = spirv.IAdd %376, %73 : i32
%378 = spirv.SLessThan %366, %cst0_i32 : i32
%379 = spirv.ISub %cst-1_i32, %366 : i32
%380 = spirv.Select %378, %379, %366 : i1, i32
%381 = spirv.SDiv %380, %cst2_i32 : i32
%382 = spirv.ISub %cst-1_i32, %381 : i32
%383 = spirv.Select %378, %382, %381 : i1, i32
%384 = spirv.IMul %383, %cst-640_i32 : i32
%385 = spirv.IAdd %377, %384 : i32
%386 = spirv.IAdd %385, %65 : i32
%387 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %386] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %387, %363 : vector<4xf32>
%388 = spirv.IAdd %361, %cst11520_i32 : i32
%389 = spirv.AccessChain %__resource_var_0_1__0_addr[%cst0_i32, %388] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%390 = spirv.Load "StorageBuffer" %389 : vector<4xf32>
%391 = spirv.IAdd %386, %cst160_i32 : i32
%392 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %391] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %392, %390 : vector<4xf32>
%393 = spirv.IMul %353, %cst1152_i32 : i32
%394 = spirv.IAdd %393, %2 : i32
%395 = spirv.IAdd %394, %81 : i32
%396 = spirv.IAdd %395, %83 : i32
%397 = spirv.IAdd %396, %85 : i32
%398 = spirv.IAdd %397, %87 : i32
%399 = spirv.IAdd %398, %28 : i32
%400 = spirv.IAdd %399, %93 : i32
%401 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %400] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%402 = spirv.Load "StorageBuffer" %401 : vector<4xf32>
%403 = spirv.IMul %366, %cst288_i32 : i32
%404 = spirv.IAdd %403, %100 : i32
%405 = spirv.IMul %383, %cst-576_i32 : i32
%406 = spirv.IAdd %404, %405 : i32
%407 = spirv.IAdd %406, %92 : i32
%408 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %407] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %408, %402 : vector<4xf32>
%409 = spirv.IAdd %400, %cst18432_i32 : i32
%410 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %409] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%411 = spirv.Load "StorageBuffer" %410 : vector<4xf32>
%412 = spirv.IAdd %407, %cst144_i32 : i32
%413 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %412] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %413, %411 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %108, %346 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %109, %348 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %110, %350 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %111, %352 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %112, %375 : i32
spirv.Branch ^bb1(%353, %346, %348, %350, %352, %375 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%113 = spirv.Load "Function" %112 : i32
%114 = spirv.Load "Function" %111 : !spirv.coopmatrix<16x16xf16, Subgroup>
%115 = spirv.Load "Function" %110 : !spirv.coopmatrix<16x16xf16, Subgroup>
%116 = spirv.Load "Function" %109 : !spirv.coopmatrix<16x16xf16, Subgroup>
%117 = spirv.Load "Function" %108 : !spirv.coopmatrix<16x16xf16, Subgroup>
%118 = spirv.IMul %4, %cst160_i32 : i32
%119 = spirv.IMul %113, %cst320_i32 : i32
%120 = spirv.IAdd %118, %119 : i32
%121 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%123 = spirv.IAdd %120, %cst2_i32 : i32
%124 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%126 = spirv.IAdd %120, %cst80_i32 : i32
%127 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%129 = spirv.IAdd %120, %cst82_i32 : i32
%130 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%132 = spirv.IMul %113, %cst288_i32 : i32
%133 = spirv.IMul %6, %cst288_i32 : i32
%134 = spirv.IAdd %132, %133 : i32
%135 = spirv.SDiv %62, %cst32_i32 : i32
%136 = spirv.ISub %cst-1_i32, %135 : i32
%137 = spirv.Select %60, %136, %135 : i1, i32
%138 = spirv.IMul %137, %cst4_i32 : i32
%139 = spirv.IAdd %134, %138 : i32
%140 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.IAdd %139, %cst2_i32 : i32
%143 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.IAdd %139, %cst144_i32 : i32
%146 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%147 = spirv.NV.CooperativeMatrixLoad %146, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%148 = spirv.IAdd %139, %cst146_i32 : i32
%149 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%150 = spirv.NV.CooperativeMatrixLoad %149, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%151 = spirv.NV.CooperativeMatrixMulAdd %122, %141, %117 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%152 = spirv.NV.CooperativeMatrixMulAdd %125, %147, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%153 = spirv.NV.CooperativeMatrixMulAdd %122, %144, %116 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%154 = spirv.NV.CooperativeMatrixMulAdd %125, %150, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%155 = spirv.NV.CooperativeMatrixMulAdd %128, %141, %115 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%156 = spirv.NV.CooperativeMatrixMulAdd %131, %147, %155 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%157 = spirv.NV.CooperativeMatrixMulAdd %128, %144, %114 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%158 = spirv.NV.CooperativeMatrixMulAdd %131, %150, %157 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%159 = spirv.IMul %4, %cst288_i32 : i32
%160 = spirv.IMul %6, %cst576_i32 : i32
%161 = spirv.IAdd %159, %160 : i32
%162 = spirv.IAdd %161, %138 : i32
%163 = spirv.IAdd %162, %cst146_i32 : i32
%164 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %164, %158, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%165 = spirv.IAdd %162, %cst144_i32 : i32
%166 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %165] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %166, %156, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%167 = spirv.IAdd %162, %cst2_i32 : i32
%168 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %168, %154, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%169 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %169, %152, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%170 = spirv.IMul %50, %cst64_i32 : i32
%171 = spirv.IMul %4, %cst8_i32 : i32
%172 = spirv.IAdd %170, %171 : i32
%173 = spirv.IMul %6, %cst16_i32 : i32
%174 = spirv.IAdd %172, %173 : i32
%175 = spirv.IAdd %174, %34 : i32
%176 = spirv.IAdd %175, %92 : i32
%177 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %176] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%178 = spirv.Load "StorageBuffer" %177 : f16
%179 = spirv.IAdd %174, %40 : i32
%180 = spirv.IAdd %179, %92 : i32
%181 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%182 = spirv.Load "StorageBuffer" %181 : f16
%183 = spirv.IMul %48, %cst320_i32 : i32
%184 = spirv.IAdd %174, %183 : i32
%185 = spirv.IAdd %184, %92 : i32
%186 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%187 = spirv.Load "StorageBuffer" %186 : f16
%188 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%189 = spirv.Load "Workgroup" %188 : vector<4xf32>
%190 = spirv.CompositeConstruct %182, %182, %182, %182 : (f16, f16, f16, f16) -> vector<4xf16>
%191 = spirv.CompositeConstruct %187, %187, %187, %187 : (f16, f16, f16, f16) -> vector<4xf16>
%192 = spirv.FAdd %190, %191 : vector<4xf16>
%193 = spirv.VectorShuffle [0 : i32, 1 : i32] %189 : vector<4xf32>, %189 : vector<4xf32> -> vector<2xf32>
%194 = spirv.Bitcast %193 : vector<2xf32> to vector<4xf16>
%195 = spirv.CompositeConstruct %178, %178, %178, %178 : (f16, f16, f16, f16) -> vector<4xf16>
%196 = spirv.FAdd %194, %195 : vector<4xf16>
%197 = spirv.VectorShuffle [2 : i32, 3 : i32] %189 : vector<4xf32>, %189 : vector<4xf32> -> vector<2xf32>
%198 = spirv.Bitcast %197 : vector<2xf32> to vector<4xf16>
%199 = spirv.FAdd %198, %195 : vector<4xf16>
%200 = spirv.FAdd %196, %192 : vector<4xf16>
%201 = spirv.FAdd %199, %192 : vector<4xf16>
%202 = spirv.Bitcast %201 : vector<4xf16> to vector<2xf32>
%203 = spirv.Bitcast %200 : vector<4xf16> to vector<2xf32>
%204 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %203 : vector<2xf32> -> vector<4xf32>
%205 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %204 : vector<4xf32>, %202 : vector<2xf32> -> vector<4xf32>
%206 = spirv.IMul %50, %cst73728_i32 : i32
%207 = spirv.IAdd %206, %2 : i32
%208 = spirv.IAdd %207, %81 : i32
%209 = spirv.IAdd %208, %83 : i32
%210 = spirv.IAdd %209, %85 : i32
%211 = spirv.IMul %48, %cst368640_i32 : i32
%212 = spirv.IAdd %210, %211 : i32
%213 = spirv.IAdd %212, %46 : i32
%214 = spirv.IAdd %213, %93 : i32
%215 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %214] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %215, %205 : vector<4xf32>
%216 = spirv.IAdd %176, %cst16_i32 : i32
%217 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %216] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%218 = spirv.Load "StorageBuffer" %217 : f16
%219 = spirv.IAdd %180, %cst16_i32 : i32
%220 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %219] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%221 = spirv.Load "StorageBuffer" %220 : f16
%222 = spirv.IAdd %185, %cst16_i32 : i32
%223 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%224 = spirv.Load "StorageBuffer" %223 : f16
%225 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%226 = spirv.Load "Workgroup" %225 : vector<4xf32>
%227 = spirv.CompositeConstruct %221, %221, %221, %221 : (f16, f16, f16, f16) -> vector<4xf16>
%228 = spirv.CompositeConstruct %224, %224, %224, %224 : (f16, f16, f16, f16) -> vector<4xf16>
%229 = spirv.FAdd %227, %228 : vector<4xf16>
%230 = spirv.VectorShuffle [0 : i32, 1 : i32] %226 : vector<4xf32>, %226 : vector<4xf32> -> vector<2xf32>
%231 = spirv.Bitcast %230 : vector<2xf32> to vector<4xf16>
%232 = spirv.CompositeConstruct %218, %218, %218, %218 : (f16, f16, f16, f16) -> vector<4xf16>
%233 = spirv.FAdd %231, %232 : vector<4xf16>
%234 = spirv.VectorShuffle [2 : i32, 3 : i32] %226 : vector<4xf32>, %226 : vector<4xf32> -> vector<2xf32>
%235 = spirv.Bitcast %234 : vector<2xf32> to vector<4xf16>
%236 = spirv.FAdd %235, %232 : vector<4xf16>
%237 = spirv.FAdd %233, %229 : vector<4xf16>
%238 = spirv.FAdd %236, %229 : vector<4xf16>
%239 = spirv.Bitcast %238 : vector<4xf16> to vector<2xf32>
%240 = spirv.Bitcast %237 : vector<4xf16> to vector<2xf32>
%241 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %240 : vector<2xf32> -> vector<4xf32>
%242 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %241 : vector<4xf32>, %239 : vector<2xf32> -> vector<4xf32>
%243 = spirv.IAdd %214, %cst18432_i32 : i32
%244 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %243] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %244, %242 : vector<4xf32>
%245 = spirv.IAdd %176, %cst32_i32 : i32
%246 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%247 = spirv.Load "StorageBuffer" %246 : f16
%248 = spirv.IAdd %180, %cst32_i32 : i32
%249 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%250 = spirv.Load "StorageBuffer" %249 : f16
%251 = spirv.IAdd %185, %cst32_i32 : i32
%252 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %251] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%253 = spirv.Load "StorageBuffer" %252 : f16
%254 = spirv.IAdd %101, %cst288_i32 : i32
%255 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %254] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%256 = spirv.Load "Workgroup" %255 : vector<4xf32>
%257 = spirv.CompositeConstruct %250, %250, %250, %250 : (f16, f16, f16, f16) -> vector<4xf16>
%258 = spirv.CompositeConstruct %253, %253, %253, %253 : (f16, f16, f16, f16) -> vector<4xf16>
%259 = spirv.FAdd %257, %258 : vector<4xf16>
%260 = spirv.VectorShuffle [0 : i32, 1 : i32] %256 : vector<4xf32>, %256 : vector<4xf32> -> vector<2xf32>
%261 = spirv.Bitcast %260 : vector<2xf32> to vector<4xf16>
%262 = spirv.CompositeConstruct %247, %247, %247, %247 : (f16, f16, f16, f16) -> vector<4xf16>
%263 = spirv.FAdd %261, %262 : vector<4xf16>
%264 = spirv.VectorShuffle [2 : i32, 3 : i32] %256 : vector<4xf32>, %256 : vector<4xf32> -> vector<2xf32>
%265 = spirv.Bitcast %264 : vector<2xf32> to vector<4xf16>
%266 = spirv.FAdd %265, %262 : vector<4xf16>
%267 = spirv.FAdd %263, %259 : vector<4xf16>
%268 = spirv.FAdd %266, %259 : vector<4xf16>
%269 = spirv.Bitcast %268 : vector<4xf16> to vector<2xf32>
%270 = spirv.Bitcast %267 : vector<4xf16> to vector<2xf32>
%271 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %270 : vector<2xf32> -> vector<4xf32>
%272 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %271 : vector<4xf32>, %269 : vector<2xf32> -> vector<4xf32>
%273 = spirv.IAdd %214, %cst36864_i32 : i32
%274 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %273] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %274, %272 : vector<4xf32>
%275 = spirv.IAdd %176, %cst48_i32 : i32
%276 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %275] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%277 = spirv.Load "StorageBuffer" %276 : f16
%278 = spirv.IAdd %180, %cst48_i32 : i32
%279 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %278] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%280 = spirv.Load "StorageBuffer" %279 : f16
%281 = spirv.IAdd %185, %cst48_i32 : i32
%282 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %281] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%283 = spirv.Load "StorageBuffer" %282 : f16
%284 = spirv.IAdd %101, %cst432_i32 : i32
%285 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %284] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%286 = spirv.Load "Workgroup" %285 : vector<4xf32>
%287 = spirv.CompositeConstruct %280, %280, %280, %280 : (f16, f16, f16, f16) -> vector<4xf16>
%288 = spirv.CompositeConstruct %283, %283, %283, %283 : (f16, f16, f16, f16) -> vector<4xf16>
%289 = spirv.FAdd %287, %288 : vector<4xf16>
%290 = spirv.VectorShuffle [0 : i32, 1 : i32] %286 : vector<4xf32>, %286 : vector<4xf32> -> vector<2xf32>
%291 = spirv.Bitcast %290 : vector<2xf32> to vector<4xf16>
%292 = spirv.CompositeConstruct %277, %277, %277, %277 : (f16, f16, f16, f16) -> vector<4xf16>
%293 = spirv.FAdd %291, %292 : vector<4xf16>
%294 = spirv.VectorShuffle [2 : i32, 3 : i32] %286 : vector<4xf32>, %286 : vector<4xf32> -> vector<2xf32>
%295 = spirv.Bitcast %294 : vector<2xf32> to vector<4xf16>
%296 = spirv.FAdd %295, %292 : vector<4xf16>
%297 = spirv.FAdd %293, %289 : vector<4xf16>
%298 = spirv.FAdd %296, %289 : vector<4xf16>
%299 = spirv.Bitcast %298 : vector<4xf16> to vector<2xf32>
%300 = spirv.Bitcast %297 : vector<4xf16> to vector<2xf32>
%301 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %300 : vector<2xf32> -> vector<4xf32>
%302 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %301 : vector<4xf32>, %299 : vector<2xf32> -> vector<4xf32>
%303 = spirv.IAdd %214, %cst55296_i32 : i32
%304 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %303] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %304, %302 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_22_generic_2x320x9216x2880, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_22_generic_2x320x9216x2880 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_30 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_30_generic_2x320x9216x2880 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c144 = arith.constant 144 : index
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
hal.return %c144, %c5, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_30_generic_2x320x9216x2880() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst432_i32 = spirv.Constant 432 : i32
%cst48_i32 = spirv.Constant 48 : i32
%cst55296_i32 = spirv.Constant 55296 : i32
%cst36864_i32 = spirv.Constant 36864 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst368640_i32 = spirv.Constant 368640 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst576_i32 = spirv.Constant 576 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst1152_i32 = spirv.Constant 1152 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst1144_i32 = spirv.Constant 1144 : i32
%cst3317760_i32 = spirv.Constant 3317760 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst18432_i32 = spirv.Constant 18432 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst356_i32 = spirv.Constant 356 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst11520_i32 = spirv.Constant 11520 : i32
%cst5760_i32 = spirv.Constant 5760 : i32
%cst23040_i32 = spirv.Constant 23040 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst_vec_4xf32 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst0_i32 = spirv.Constant 0 : i32
%cst2848_i32 = spirv.Constant 2848 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%14 = spirv.Load "PushConstant" %13 : i32
%15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%16 = spirv.Load "PushConstant" %15 : i32
%17 = spirv.SLessThan %8, %cst0_i32 : i32
%18 = spirv.ISub %cst-1_i32, %8 : i32
%19 = spirv.Select %17, %18, %8 : i1, i32
%20 = spirv.SDiv %19, %cst16_i32 : i32
%21 = spirv.ISub %cst-1_i32, %20 : i32
%22 = spirv.Select %17, %21, %20 : i1, i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%23 = spirv.SLessThan %12, %cst0_i32 : i32
%24 = spirv.ISub %cst-1_i32, %12 : i32
%25 = spirv.Select %23, %24, %12 : i1, i32
%26 = spirv.SDiv %25, %cst16_i32 : i32
%27 = spirv.ISub %cst-1_i32, %26 : i32
%28 = spirv.Select %23, %27, %26 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%29 = spirv.SLessThan %14, %cst0_i32 : i32
%30 = spirv.ISub %cst-1_i32, %14 : i32
%31 = spirv.Select %29, %30, %14 : i1, i32
%32 = spirv.SDiv %31, %cst16_i32 : i32
%33 = spirv.ISub %cst-1_i32, %32 : i32
%34 = spirv.Select %29, %33, %32 : i1, i32
%35 = spirv.SLessThan %10, %cst0_i32 : i32
%36 = spirv.ISub %cst-1_i32, %10 : i32
%37 = spirv.Select %35, %36, %10 : i1, i32
%38 = spirv.SDiv %37, %cst2_i32 : i32
%39 = spirv.ISub %cst-1_i32, %38 : i32
%40 = spirv.Select %35, %39, %38 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%41 = spirv.SLessThan %16, %cst0_i32 : i32
%42 = spirv.ISub %cst-1_i32, %16 : i32
%43 = spirv.Select %41, %42, %16 : i1, i32
%44 = spirv.SDiv %43, %cst16_i32 : i32
%45 = spirv.ISub %cst-1_i32, %44 : i32
%46 = spirv.Select %41, %45, %44 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%48 = spirv.CompositeExtract %47[2 : i32] : vector<3xi32>
%49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%50 = spirv.CompositeExtract %49[1 : i32] : vector<3xi32>
%51 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%52 = spirv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spirv.IMul %50, %cst23040_i32 : i32
%54 = spirv.IAdd %53, %2 : i32
%55 = spirv.IMul %4, %cst5760_i32 : i32
%56 = spirv.IAdd %54, %55 : i32
%57 = spirv.IMul %6, %cst11520_i32 : i32
%58 = spirv.IAdd %56, %57 : i32
%59 = spirv.IAdd %58, %22 : i32
%60 = spirv.SLessThan %2, %cst0_i32 : i32
%61 = spirv.ISub %cst-1_i32, %2 : i32
%62 = spirv.Select %60, %61, %2 : i1, i32
%63 = spirv.SDiv %62, %cst4_i32 : i32
%64 = spirv.ISub %cst-1_i32, %63 : i32
%65 = spirv.Select %60, %64, %63 : i1, i32
%66 = spirv.IMul %65, %cst356_i32 : i32
%67 = spirv.IAdd %59, %66 : i32
%68 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%69 = spirv.Load "StorageBuffer" %68 : vector<4xf32>
%70 = spirv.IMul %4, %cst80_i32 : i32
%71 = spirv.IAdd %2, %70 : i32
%72 = spirv.IMul %6, %cst160_i32 : i32
%73 = spirv.IAdd %71, %72 : i32
%74 = spirv.IAdd %73, %65 : i32
%75 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %75, %69 : vector<4xf32>
%76 = spirv.IAdd %67, %cst11520_i32 : i32
%77 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%78 = spirv.Load "StorageBuffer" %77 : vector<4xf32>
%79 = spirv.IAdd %74, %cst160_i32 : i32
%80 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %80, %78 : vector<4xf32>
%81 = spirv.IMul %4, %cst9216_i32 : i32
%82 = spirv.IAdd %2, %81 : i32
%83 = spirv.IMul %6, %cst18432_i32 : i32
%84 = spirv.IAdd %82, %83 : i32
%85 = spirv.IMul %52, %cst8_i32 : i32
%86 = spirv.IAdd %84, %85 : i32
%87 = spirv.IMul %48, %cst3317760_i32 : i32
%88 = spirv.IAdd %86, %87 : i32
%89 = spirv.IAdd %88, %28 : i32
%90 = spirv.SDiv %62, %cst8_i32 : i32
%91 = spirv.ISub %cst-1_i32, %90 : i32
%92 = spirv.Select %60, %91, %90 : i1, i32
%93 = spirv.IMul %92, %cst1144_i32 : i32
%94 = spirv.IAdd %89, %93 : i32
%95 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%96 = spirv.Load "StorageBuffer" %95 : vector<4xf32>
%97 = spirv.IMul %4, %cst72_i32 : i32
%98 = spirv.IAdd %2, %97 : i32
%99 = spirv.IMul %6, %cst144_i32 : i32
%100 = spirv.IAdd %98, %99 : i32
%101 = spirv.IAdd %100, %92 : i32
%102 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %102, %96 : vector<4xf32>
%103 = spirv.IAdd %94, %cst18432_i32 : i32
%104 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%105 = spirv.Load "StorageBuffer" %104 : vector<4xf32>
%106 = spirv.IAdd %101, %cst144_i32 : i32
%107 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %107, %105 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%108 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%109 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%110 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%111 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%112 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%295: i32, %296: !spirv.coopmatrix<16x16xf16, Subgroup>, %297: !spirv.coopmatrix<16x16xf16, Subgroup>, %298: !spirv.coopmatrix<16x16xf16, Subgroup>, %299: !spirv.coopmatrix<16x16xf16, Subgroup>, %300: i32): // 2 preds: ^bb0, ^bb2
%301 = spirv.SLessThan %295, %cst2848_i32 : i32
spirv.BranchConditional %301, ^bb2, ^bb3
^bb2: // pred: ^bb1
%302 = spirv.IMul %300, %cst320_i32 : i32
%303 = spirv.IMul %4, %cst160_i32 : i32
%304 = spirv.IAdd %302, %303 : i32
%305 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %304] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%306 = spirv.NV.CooperativeMatrixLoad %305, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%307 = spirv.IAdd %304, %cst2_i32 : i32
%308 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %307] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%309 = spirv.NV.CooperativeMatrixLoad %308, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%310 = spirv.IAdd %304, %cst80_i32 : i32
%311 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %310] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%312 = spirv.NV.CooperativeMatrixLoad %311, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%313 = spirv.IAdd %304, %cst82_i32 : i32
%314 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %313] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%315 = spirv.NV.CooperativeMatrixLoad %314, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%316 = spirv.IMul %300, %cst288_i32 : i32
%317 = spirv.IMul %6, %cst288_i32 : i32
%318 = spirv.IAdd %316, %317 : i32
%319 = spirv.SDiv %62, %cst32_i32 : i32
%320 = spirv.ISub %cst-1_i32, %319 : i32
%321 = spirv.Select %60, %320, %319 : i1, i32
%322 = spirv.IMul %321, %cst4_i32 : i32
%323 = spirv.IAdd %318, %322 : i32
%324 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %323] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%325 = spirv.NV.CooperativeMatrixLoad %324, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%326 = spirv.IAdd %323, %cst2_i32 : i32
%327 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %326] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%328 = spirv.NV.CooperativeMatrixLoad %327, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%329 = spirv.IAdd %323, %cst144_i32 : i32
%330 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %329] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%331 = spirv.NV.CooperativeMatrixLoad %330, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%332 = spirv.IAdd %323, %cst146_i32 : i32
%333 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %332] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%334 = spirv.NV.CooperativeMatrixLoad %333, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%335 = spirv.NV.CooperativeMatrixMulAdd %306, %325, %296 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%336 = spirv.NV.CooperativeMatrixMulAdd %309, %331, %335 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%337 = spirv.NV.CooperativeMatrixMulAdd %306, %328, %297 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%338 = spirv.NV.CooperativeMatrixMulAdd %309, %334, %337 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%339 = spirv.NV.CooperativeMatrixMulAdd %312, %325, %298 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%340 = spirv.NV.CooperativeMatrixMulAdd %315, %331, %339 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%341 = spirv.NV.CooperativeMatrixMulAdd %312, %328, %299 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%342 = spirv.NV.CooperativeMatrixMulAdd %315, %334, %341 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%343 = spirv.IAdd %295, %cst32_i32 : i32
%344 = spirv.SLessThan %343, %cst0_i32 : i32
%345 = spirv.ISub %cst-33_i32, %295 : i32
%346 = spirv.Select %344, %345, %343 : i1, i32
%347 = spirv.SDiv %346, %cst8_i32 : i32
%348 = spirv.ISub %cst-1_i32, %347 : i32
%349 = spirv.Select %344, %348, %347 : i1, i32
%350 = spirv.IAdd %59, %349 : i32
%351 = spirv.IAdd %350, %66 : i32
%352 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %351] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%353 = spirv.Load "StorageBuffer" %352 : vector<4xf32>
%354 = spirv.SDiv %346, %cst32_i32 : i32
%355 = spirv.ISub %cst-1_i32, %354 : i32
%356 = spirv.Select %344, %355, %354 : i1, i32
%357 = spirv.GL.SAbs %356 : i32
%358 = spirv.GL.SAbs %cst2_i32 : i32
%359 = spirv.UMod %357, %358 : i32
%360 = spirv.IEqual %356, %357 : i32
%361 = spirv.SNegate %359 : i32
%362 = spirv.Select %360, %359, %361 : i1, i32
%363 = spirv.SLessThan %362, %cst0_i32 : i32
%364 = spirv.IAdd %362, %cst2_i32 : i32
%365 = spirv.Select %363, %364, %362 : i1, i32
%366 = spirv.IMul %356, %cst320_i32 : i32
%367 = spirv.IAdd %366, %73 : i32
%368 = spirv.SLessThan %356, %cst0_i32 : i32
%369 = spirv.ISub %cst-1_i32, %356 : i32
%370 = spirv.Select %368, %369, %356 : i1, i32
%371 = spirv.SDiv %370, %cst2_i32 : i32
%372 = spirv.ISub %cst-1_i32, %371 : i32
%373 = spirv.Select %368, %372, %371 : i1, i32
%374 = spirv.IMul %373, %cst-640_i32 : i32
%375 = spirv.IAdd %367, %374 : i32
%376 = spirv.IAdd %375, %65 : i32
%377 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %376] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %377, %353 : vector<4xf32>
%378 = spirv.IAdd %351, %cst11520_i32 : i32
%379 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %378] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%380 = spirv.Load "StorageBuffer" %379 : vector<4xf32>
%381 = spirv.IAdd %376, %cst160_i32 : i32
%382 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %381] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %382, %380 : vector<4xf32>
%383 = spirv.IMul %343, %cst1152_i32 : i32
%384 = spirv.IAdd %383, %2 : i32
%385 = spirv.IAdd %384, %81 : i32
%386 = spirv.IAdd %385, %83 : i32
%387 = spirv.IAdd %386, %85 : i32
%388 = spirv.IAdd %387, %87 : i32
%389 = spirv.IAdd %388, %28 : i32
%390 = spirv.IAdd %389, %93 : i32
%391 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %390] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%392 = spirv.Load "StorageBuffer" %391 : vector<4xf32>
%393 = spirv.IMul %356, %cst288_i32 : i32
%394 = spirv.IAdd %393, %100 : i32
%395 = spirv.IMul %373, %cst-576_i32 : i32
%396 = spirv.IAdd %394, %395 : i32
%397 = spirv.IAdd %396, %92 : i32
%398 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %397] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %398, %392 : vector<4xf32>
%399 = spirv.IAdd %390, %cst18432_i32 : i32
%400 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %399] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%401 = spirv.Load "StorageBuffer" %400 : vector<4xf32>
%402 = spirv.IAdd %397, %cst144_i32 : i32
%403 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %402] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %403, %401 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %108, %336 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %109, %338 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %110, %340 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %111, %342 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %112, %365 : i32
spirv.Branch ^bb1(%343, %336, %338, %340, %342, %365 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%113 = spirv.Load "Function" %112 : i32
%114 = spirv.Load "Function" %111 : !spirv.coopmatrix<16x16xf16, Subgroup>
%115 = spirv.Load "Function" %110 : !spirv.coopmatrix<16x16xf16, Subgroup>
%116 = spirv.Load "Function" %109 : !spirv.coopmatrix<16x16xf16, Subgroup>
%117 = spirv.Load "Function" %108 : !spirv.coopmatrix<16x16xf16, Subgroup>
%118 = spirv.IMul %4, %cst160_i32 : i32
%119 = spirv.IMul %113, %cst320_i32 : i32
%120 = spirv.IAdd %118, %119 : i32
%121 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%122 = spirv.NV.CooperativeMatrixLoad %121, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%123 = spirv.IAdd %120, %cst2_i32 : i32
%124 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%126 = spirv.IAdd %120, %cst80_i32 : i32
%127 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%129 = spirv.IAdd %120, %cst82_i32 : i32
%130 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%132 = spirv.IMul %113, %cst288_i32 : i32
%133 = spirv.IMul %6, %cst288_i32 : i32
%134 = spirv.IAdd %132, %133 : i32
%135 = spirv.SDiv %62, %cst32_i32 : i32
%136 = spirv.ISub %cst-1_i32, %135 : i32
%137 = spirv.Select %60, %136, %135 : i1, i32
%138 = spirv.IMul %137, %cst4_i32 : i32
%139 = spirv.IAdd %134, %138 : i32
%140 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.IAdd %139, %cst2_i32 : i32
%143 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.IAdd %139, %cst144_i32 : i32
%146 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%147 = spirv.NV.CooperativeMatrixLoad %146, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%148 = spirv.IAdd %139, %cst146_i32 : i32
%149 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%150 = spirv.NV.CooperativeMatrixLoad %149, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%151 = spirv.NV.CooperativeMatrixMulAdd %122, %141, %117 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%152 = spirv.NV.CooperativeMatrixMulAdd %125, %147, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%153 = spirv.NV.CooperativeMatrixMulAdd %122, %144, %116 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%154 = spirv.NV.CooperativeMatrixMulAdd %125, %150, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%155 = spirv.NV.CooperativeMatrixMulAdd %128, %141, %115 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%156 = spirv.NV.CooperativeMatrixMulAdd %131, %147, %155 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%157 = spirv.NV.CooperativeMatrixMulAdd %128, %144, %114 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%158 = spirv.NV.CooperativeMatrixMulAdd %131, %150, %157 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%159 = spirv.IMul %4, %cst288_i32 : i32
%160 = spirv.IMul %6, %cst576_i32 : i32
%161 = spirv.IAdd %159, %160 : i32
%162 = spirv.IAdd %161, %138 : i32
%163 = spirv.IAdd %162, %cst146_i32 : i32
%164 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %164, %158, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%165 = spirv.IAdd %162, %cst144_i32 : i32
%166 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %165] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %166, %156, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%167 = spirv.IAdd %162, %cst2_i32 : i32
%168 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %168, %154, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
%169 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.NV.CooperativeMatrixStore %169, %152, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%170 = spirv.IMul %50, %cst73728_i32 : i32
%171 = spirv.IAdd %170, %2 : i32
%172 = spirv.IAdd %171, %81 : i32
%173 = spirv.IAdd %172, %83 : i32
%174 = spirv.IAdd %173, %85 : i32
%175 = spirv.IMul %48, %cst368640_i32 : i32
%176 = spirv.IAdd %174, %175 : i32
%177 = spirv.IAdd %176, %34 : i32
%178 = spirv.IAdd %177, %93 : i32
%179 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%180 = spirv.Load "StorageBuffer" %179 : vector<4xf32>
%181 = spirv.IMul %50, %cst64_i32 : i32
%182 = spirv.IMul %4, %cst8_i32 : i32
%183 = spirv.IAdd %181, %182 : i32
%184 = spirv.IMul %6, %cst16_i32 : i32
%185 = spirv.IAdd %183, %184 : i32
%186 = spirv.IAdd %185, %40 : i32
%187 = spirv.IAdd %186, %92 : i32
%188 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%189 = spirv.Load "StorageBuffer" %188 : f16
%190 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%191 = spirv.Load "Workgroup" %190 : vector<4xf32>
%192 = spirv.VectorShuffle [0 : i32, 1 : i32] %191 : vector<4xf32>, %191 : vector<4xf32> -> vector<2xf32>
%193 = spirv.Bitcast %192 : vector<2xf32> to vector<4xf16>
%194 = spirv.CompositeConstruct %189, %189, %189, %189 : (f16, f16, f16, f16) -> vector<4xf16>
%195 = spirv.FAdd %193, %194 : vector<4xf16>
%196 = spirv.VectorShuffle [2 : i32, 3 : i32] %191 : vector<4xf32>, %191 : vector<4xf32> -> vector<2xf32>
%197 = spirv.Bitcast %196 : vector<2xf32> to vector<4xf16>
%198 = spirv.FAdd %197, %194 : vector<4xf16>
%199 = spirv.VectorShuffle [0 : i32, 1 : i32] %180 : vector<4xf32>, %180 : vector<4xf32> -> vector<2xf32>
%200 = spirv.Bitcast %199 : vector<2xf32> to vector<4xf16>
%201 = spirv.FAdd %200, %195 : vector<4xf16>
%202 = spirv.VectorShuffle [2 : i32, 3 : i32] %180 : vector<4xf32>, %180 : vector<4xf32> -> vector<2xf32>
%203 = spirv.Bitcast %202 : vector<2xf32> to vector<4xf16>
%204 = spirv.FAdd %203, %198 : vector<4xf16>
%205 = spirv.Bitcast %204 : vector<4xf16> to vector<2xf32>
%206 = spirv.Bitcast %201 : vector<4xf16> to vector<2xf32>
%207 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %206 : vector<2xf32> -> vector<4xf32>
%208 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %207 : vector<4xf32>, %205 : vector<2xf32> -> vector<4xf32>
%209 = spirv.IAdd %176, %46 : i32
%210 = spirv.IAdd %209, %93 : i32
%211 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %210] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %211, %208 : vector<4xf32>
%212 = spirv.IAdd %178, %cst18432_i32 : i32
%213 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %212] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%214 = spirv.Load "StorageBuffer" %213 : vector<4xf32>
%215 = spirv.IAdd %187, %cst16_i32 : i32
%216 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %215] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%217 = spirv.Load "StorageBuffer" %216 : f16
%218 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%219 = spirv.Load "Workgroup" %218 : vector<4xf32>
%220 = spirv.VectorShuffle [0 : i32, 1 : i32] %219 : vector<4xf32>, %219 : vector<4xf32> -> vector<2xf32>
%221 = spirv.Bitcast %220 : vector<2xf32> to vector<4xf16>
%222 = spirv.CompositeConstruct %217, %217, %217, %217 : (f16, f16, f16, f16) -> vector<4xf16>
%223 = spirv.FAdd %221, %222 : vector<4xf16>
%224 = spirv.VectorShuffle [2 : i32, 3 : i32] %219 : vector<4xf32>, %219 : vector<4xf32> -> vector<2xf32>
%225 = spirv.Bitcast %224 : vector<2xf32> to vector<4xf16>
%226 = spirv.FAdd %225, %222 : vector<4xf16>
%227 = spirv.VectorShuffle [0 : i32, 1 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32>
%228 = spirv.Bitcast %227 : vector<2xf32> to vector<4xf16>
%229 = spirv.FAdd %228, %223 : vector<4xf16>
%230 = spirv.VectorShuffle [2 : i32, 3 : i32] %214 : vector<4xf32>, %214 : vector<4xf32> -> vector<2xf32>
%231 = spirv.Bitcast %230 : vector<2xf32> to vector<4xf16>
%232 = spirv.FAdd %231, %226 : vector<4xf16>
%233 = spirv.Bitcast %232 : vector<4xf16> to vector<2xf32>
%234 = spirv.Bitcast %229 : vector<4xf16> to vector<2xf32>
%235 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %234 : vector<2xf32> -> vector<4xf32>
%236 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %235 : vector<4xf32>, %233 : vector<2xf32> -> vector<4xf32>
%237 = spirv.IAdd %210, %cst18432_i32 : i32
%238 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %237] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %238, %236 : vector<4xf32>
%239 = spirv.IAdd %178, %cst36864_i32 : i32
%240 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %239] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%241 = spirv.Load "StorageBuffer" %240 : vector<4xf32>
%242 = spirv.IAdd %187, %cst32_i32 : i32
%243 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %242] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%244 = spirv.Load "StorageBuffer" %243 : f16
%245 = spirv.IAdd %101, %cst288_i32 : i32
%246 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%247 = spirv.Load "Workgroup" %246 : vector<4xf32>
%248 = spirv.VectorShuffle [0 : i32, 1 : i32] %247 : vector<4xf32>, %247 : vector<4xf32> -> vector<2xf32>
%249 = spirv.Bitcast %248 : vector<2xf32> to vector<4xf16>
%250 = spirv.CompositeConstruct %244, %244, %244, %244 : (f16, f16, f16, f16) -> vector<4xf16>
%251 = spirv.FAdd %249, %250 : vector<4xf16>
%252 = spirv.VectorShuffle [2 : i32, 3 : i32] %247 : vector<4xf32>, %247 : vector<4xf32> -> vector<2xf32>
%253 = spirv.Bitcast %252 : vector<2xf32> to vector<4xf16>
%254 = spirv.FAdd %253, %250 : vector<4xf16>
%255 = spirv.VectorShuffle [0 : i32, 1 : i32] %241 : vector<4xf32>, %241 : vector<4xf32> -> vector<2xf32>
%256 = spirv.Bitcast %255 : vector<2xf32> to vector<4xf16>
%257 = spirv.FAdd %256, %251 : vector<4xf16>
%258 = spirv.VectorShuffle [2 : i32, 3 : i32] %241 : vector<4xf32>, %241 : vector<4xf32> -> vector<2xf32>
%259 = spirv.Bitcast %258 : vector<2xf32> to vector<4xf16>
%260 = spirv.FAdd %259, %254 : vector<4xf16>
%261 = spirv.Bitcast %260 : vector<4xf16> to vector<2xf32>
%262 = spirv.Bitcast %257 : vector<4xf16> to vector<2xf32>
%263 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %262 : vector<2xf32> -> vector<4xf32>
%264 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %263 : vector<4xf32>, %261 : vector<2xf32> -> vector<4xf32>
%265 = spirv.IAdd %210, %cst36864_i32 : i32
%266 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %266, %264 : vector<4xf32>
%267 = spirv.IAdd %178, %cst55296_i32 : i32
%268 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %267] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%269 = spirv.Load "StorageBuffer" %268 : vector<4xf32>
%270 = spirv.IAdd %187, %cst48_i32 : i32
%271 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %270] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%272 = spirv.Load "StorageBuffer" %271 : f16
%273 = spirv.IAdd %101, %cst432_i32 : i32
%274 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %273] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%275 = spirv.Load "Workgroup" %274 : vector<4xf32>
%276 = spirv.VectorShuffle [0 : i32, 1 : i32] %275 : vector<4xf32>, %275 : vector<4xf32> -> vector<2xf32>
%277 = spirv.Bitcast %276 : vector<2xf32> to vector<4xf16>
%278 = spirv.CompositeConstruct %272, %272, %272, %272 : (f16, f16, f16, f16) -> vector<4xf16>
%279 = spirv.FAdd %277, %278 : vector<4xf16>
%280 = spirv.VectorShuffle [2 : i32, 3 : i32] %275 : vector<4xf32>, %275 : vector<4xf32> -> vector<2xf32>
%281 = spirv.Bitcast %280 : vector<2xf32> to vector<4xf16>
%282 = spirv.FAdd %281, %278 : vector<4xf16>
%283 = spirv.VectorShuffle [0 : i32, 1 : i32] %269 : vector<4xf32>, %269 : vector<4xf32> -> vector<2xf32>
%284 = spirv.Bitcast %283 : vector<2xf32> to vector<4xf16>
%285 = spirv.FAdd %284, %279 : vector<4xf16>
%286 = spirv.VectorShuffle [2 : i32, 3 : i32] %269 : vector<4xf32>, %269 : vector<4xf32> -> vector<2xf32>
%287 = spirv.Bitcast %286 : vector<2xf32> to vector<4xf16>
%288 = spirv.FAdd %287, %282 : vector<4xf16>
%289 = spirv.Bitcast %288 : vector<4xf16> to vector<2xf32>
%290 = spirv.Bitcast %285 : vector<4xf16> to vector<2xf32>
%291 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32 : vector<4xf32>, %290 : vector<2xf32> -> vector<4xf32>
%292 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %291 : vector<4xf32>, %289 : vector<2xf32> -> vector<4xf32>
%293 = spirv.IAdd %210, %cst55296_i32 : i32
%294 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %293] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %294, %292 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_30_generic_2x320x9216x2880, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_30_generic_2x320x9216x2880 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_34 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_34_generic_64x10x9216 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c72 = arith.constant 72 : index
%c10 = arith.constant 10 : index
%c64 = arith.constant 64 : index
hal.return %c72, %c10, %c64 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_34_generic_64x10x9216() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%cst23040_i32 = spirv.Constant 23040 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 9.216000e+04 : f32
%cst_f32_0 = spirv.Constant 9.99999997E-7 : f32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst8_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst8_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32>
%22 = spirv.FConvert %cst_f32_0 : f32 to f16
%23 = spirv.CompositeConstruct %22, %22, %22, %22 : (f16, f16, f16, f16) -> vector<4xf16>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32>
%26 = spirv.IMul %21, %cst32_i32 : i32
%27 = spirv.IAdd %26, %25 : i32
%28 = spirv.IMul %17, %cst23040_i32 : i32
%29 = spirv.IAdd %27, %28 : i32
%30 = spirv.IMul %19, %cst2304_i32 : i32
%31 = spirv.IAdd %29, %30 : i32
%32 = spirv.IAdd %31, %9 : i32
%33 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%34 = spirv.Load "StorageBuffer" %33 : vector<4xf16>
%35 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %17] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%36 = spirv.Load "StorageBuffer" %35 : f32
%37 = spirv.IAdd %17, %cst64_i32 : i32
%38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%39 = spirv.Load "StorageBuffer" %38 : f32
%40 = spirv.FDiv %39, %cst_f32 : f32
%41 = spirv.FConvert %40 : f32 to f16
%42 = spirv.CompositeConstruct %41, %41, %41, %41 : (f16, f16, f16, f16) -> vector<4xf16>
%43 = spirv.FAdd %42, %23 : vector<4xf16>
%44 = spirv.FConvert %36 : f32 to f16
%45 = spirv.GL.InverseSqrt %43 : vector<4xf16>
%46 = spirv.CompositeConstruct %44, %44, %44, %44 : (f16, f16, f16, f16) -> vector<4xf16>
%47 = spirv.FSub %34, %46 : vector<4xf16>
%48 = spirv.FMul %47, %45 : vector<4xf16>
%49 = spirv.IAdd %31, %15 : i32
%50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %50, %48 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_34_generic_64x10x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_34_generic_64x10x9216 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_35 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_35_generic_2x320x96x96 ordinal(0) layout(#pipeline_layout6) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c3 = arith.constant 3 : index
%c24 = arith.constant 24 : index
%c640 = arith.constant 640 : index
hal.return %c3, %c24, %c640 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_35_generic_2x320x96x96() "None" {
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst960_i32 = spirv.Constant 960 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst2949120_i32 = spirv.Constant 2949120 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst10240_i32 = spirv.Constant 10240 : i32
%cst122880_i32 = spirv.Constant 122880 : i32
%cst30720_i32 = spirv.Constant 30720 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst24_i32 = spirv.Constant 24 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst320_i32 = spirv.Constant 320 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%7 = spirv.Load "PushConstant" %6 : i32
%8 = spirv.SLessThan %1, %cst0_i32 : i32
%9 = spirv.ISub %cst-1_i32, %1 : i32
%10 = spirv.Select %8, %9, %1 : i1, i32
%11 = spirv.SDiv %10, %cst8_i32 : i32
%12 = spirv.ISub %cst-1_i32, %11 : i32
%13 = spirv.Select %8, %12, %11 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%14 = spirv.SLessThan %3, %cst0_i32 : i32
%15 = spirv.ISub %cst-1_i32, %3 : i32
%16 = spirv.Select %14, %15, %3 : i1, i32
%17 = spirv.SDiv %16, %cst2_i32 : i32
%18 = spirv.ISub %cst-1_i32, %17 : i32
%19 = spirv.Select %14, %18, %17 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%20 = spirv.SLessThan %5, %cst0_i32 : i32
%21 = spirv.ISub %cst-1_i32, %5 : i32
%22 = spirv.Select %20, %21, %5 : i1, i32
%23 = spirv.SDiv %22, %cst2_i32 : i32
%24 = spirv.ISub %cst-1_i32, %23 : i32
%25 = spirv.Select %20, %24, %23 : i1, i32
%26 = spirv.SLessThan %7, %cst0_i32 : i32
%27 = spirv.ISub %cst-1_i32, %7 : i32
%28 = spirv.Select %26, %27, %7 : i1, i32
%29 = spirv.SDiv %28, %cst2_i32 : i32
%30 = spirv.ISub %cst-1_i32, %29 : i32
%31 = spirv.Select %26, %30, %29 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%32 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%33 = spirv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spirv.UDiv %33, %cst320_i32 : i32
%35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%36 = spirv.CompositeExtract %35[1 : i32] : vector<3xi32>
%37 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%38 = spirv.CompositeExtract %37[0 : i32] : vector<3xi32>
%39 = spirv.UMod %33, %cst320_i32 : i32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%40 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%41 = spirv.CompositeExtract %40[1 : i32] : vector<3xi32>
%42 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%43 = spirv.CompositeExtract %42[0 : i32] : vector<3xi32>
%44 = spirv.IMul %41, %cst24_i32 : i32
%45 = spirv.IMul %36, %cst96_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IMul %38, %cst8_i32 : i32
%48 = spirv.IAdd %46, %47 : i32
%49 = spirv.IAdd %48, %43 : i32
%50 = spirv.IMul %34, %cst737280_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IMul %39, %cst2304_i32 : i32
%53 = spirv.IAdd %51, %52 : i32
%54 = spirv.IAdd %53, %13 : i32
%55 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%56 = spirv.Load "StorageBuffer" %55 : vector<4xf16>
%57 = spirv.IAdd %39, %19 : i32
%58 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%59 = spirv.Load "StorageBuffer" %58 : f16
%60 = spirv.IAdd %39, %25 : i32
%61 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%62 = spirv.Load "StorageBuffer" %61 : f16
%63 = spirv.CompositeConstruct %59, %59, %59, %59 : (f16, f16, f16, f16) -> vector<4xf16>
%64 = spirv.FMul %56, %63 : vector<4xf16>
%65 = spirv.CompositeConstruct %62, %62, %62, %62 : (f16, f16, f16, f16) -> vector<4xf16>
%66 = spirv.FAdd %64, %65 : vector<4xf16>
%67 = spirv.CompositeExtract %66[0 : i32] : vector<4xf16>
%68 = spirv.CompositeExtract %66[1 : i32] : vector<4xf16>
%69 = spirv.CompositeExtract %66[2 : i32] : vector<4xf16>
%70 = spirv.CompositeExtract %66[3 : i32] : vector<4xf16>
%71 = spirv.IMul %41, %cst30720_i32 : i32
%72 = spirv.IMul %36, %cst122880_i32 : i32
%73 = spirv.IAdd %71, %72 : i32
%74 = spirv.IMul %38, %cst10240_i32 : i32
%75 = spirv.IAdd %73, %74 : i32
%76 = spirv.IMul %43, %cst1280_i32 : i32
%77 = spirv.IAdd %75, %76 : i32
%78 = spirv.IMul %34, %cst2949120_i32 : i32
%79 = spirv.IAdd %77, %78 : i32
%80 = spirv.IAdd %79, %39 : i32
%81 = spirv.IAdd %80, %31 : i32
%82 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %82, %67 : f16
%83 = spirv.IAdd %81, %cst320_i32 : i32
%84 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %84, %68 : f16
%85 = spirv.IAdd %81, %cst640_i32 : i32
%86 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %86, %69 : f16
%87 = spirv.IAdd %81, %cst960_i32 : i32
%88 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %88, %70 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_35_generic_2x320x96x96, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_35_generic_2x320x96x96 "LocalSize", 8, 4, 1
}
}
}
}
hal.executable private @forward_dispatch_36 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_36_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout6) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c288 = arith.constant 288 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c288, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_36_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst642_i32 = spirv.Constant 642 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst36_i32 = spirv.Constant 36 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst2560_i32 = spirv.Constant 2560 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x i32, stride=4> [0])>, PushConstant>, i32, i32
%14 = spirv.Load "PushConstant" %13 : i32
%15 = spirv.SLessThan %8, %cst0_i32 : i32
%16 = spirv.ISub %cst-1_i32, %8 : i32
%17 = spirv.Select %15, %16, %8 : i1, i32
%18 = spirv.SDiv %17, %cst16_i32 : i32
%19 = spirv.ISub %cst-1_i32, %18 : i32
%20 = spirv.Select %15, %19, %18 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%21 = spirv.SLessThan %10, %cst0_i32 : i32
%22 = spirv.ISub %cst-1_i32, %10 : i32
%23 = spirv.Select %21, %22, %10 : i1, i32
%24 = spirv.SDiv %23, %cst16_i32 : i32
%25 = spirv.ISub %cst-1_i32, %24 : i32
%26 = spirv.Select %21, %25, %24 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%27 = spirv.SLessThan %12, %cst0_i32 : i32
%28 = spirv.ISub %cst-1_i32, %12 : i32
%29 = spirv.Select %27, %28, %12 : i1, i32
%30 = spirv.SDiv %29, %cst16_i32 : i32
%31 = spirv.ISub %cst-1_i32, %30 : i32
%32 = spirv.Select %27, %31, %30 : i1, i32
%33 = spirv.SLessThan %14, %cst0_i32 : i32
%34 = spirv.ISub %cst-1_i32, %14 : i32
%35 = spirv.Select %33, %34, %14 : i1, i32
%36 = spirv.SDiv %35, %cst16_i32 : i32
%37 = spirv.ISub %cst-1_i32, %36 : i32
%38 = spirv.Select %33, %37, %36 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%39 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%40 = spirv.CompositeExtract %39[1 : i32] : vector<3xi32>
%41 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%42 = spirv.CompositeExtract %41[0 : i32] : vector<3xi32>
%43 = spirv.IMul %40, %cst2560_i32 : i32
%44 = spirv.IAdd %43, %2 : i32
%45 = spirv.IMul %4, %cst640_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IMul %6, %cst1280_i32 : i32
%48 = spirv.IAdd %46, %47 : i32
%49 = spirv.IAdd %48, %20 : i32
%50 = spirv.SLessThan %2, %cst0_i32 : i32
%51 = spirv.ISub %cst-1_i32, %2 : i32
%52 = spirv.Select %50, %51, %2 : i1, i32
%53 = spirv.SDiv %52, %cst4_i32 : i32
%54 = spirv.ISub %cst-1_i32, %53 : i32
%55 = spirv.Select %50, %54, %53 : i1, i32
%56 = spirv.IMul %55, %cst36_i32 : i32
%57 = spirv.IAdd %49, %56 : i32
%58 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%59 = spirv.Load "StorageBuffer" %58 : vector<4xf32>
%60 = spirv.IMul %4, %cst80_i32 : i32
%61 = spirv.IAdd %2, %60 : i32
%62 = spirv.IMul %6, %cst160_i32 : i32
%63 = spirv.IAdd %61, %62 : i32
%64 = spirv.IAdd %63, %55 : i32
%65 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %64] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %65, %59 : vector<4xf32>
%66 = spirv.IAdd %57, %cst1280_i32 : i32
%67 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %66] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%68 = spirv.Load "StorageBuffer" %67 : vector<4xf32>
%69 = spirv.IAdd %64, %cst160_i32 : i32
%70 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %69] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %70, %68 : vector<4xf32>
%71 = spirv.IMul %4, %cst320_i32 : i32
%72 = spirv.IAdd %2, %71 : i32
%73 = spirv.IMul %6, %cst640_i32 : i32
%74 = spirv.IAdd %72, %73 : i32
%75 = spirv.IMul %42, %cst8_i32 : i32
%76 = spirv.IAdd %74, %75 : i32
%77 = spirv.IAdd %76, %26 : i32
%78 = spirv.SDiv %52, %cst8_i32 : i32
%79 = spirv.ISub %cst-1_i32, %78 : i32
%80 = spirv.Select %50, %79, %78 : i1, i32
%81 = spirv.IMul %80, %cst32_i32 : i32
%82 = spirv.IAdd %77, %81 : i32
%83 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %82] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%84 = spirv.Load "StorageBuffer" %83 : vector<4xf32>
%85 = spirv.IMul %4, %cst72_i32 : i32
%86 = spirv.IAdd %2, %85 : i32
%87 = spirv.IMul %6, %cst144_i32 : i32
%88 = spirv.IAdd %86, %87 : i32
%89 = spirv.IAdd %88, %80 : i32
%90 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %90, %84 : vector<4xf32>
%91 = spirv.IAdd %82, %cst640_i32 : i32
%92 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%93 = spirv.Load "StorageBuffer" %92 : vector<4xf32>
%94 = spirv.IAdd %89, %cst144_i32 : i32
%95 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %95, %93 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%96 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%97 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%98 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%99 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%100 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%168: i32, %169: !spirv.coopmatrix<16x16xf16, Subgroup>, %170: !spirv.coopmatrix<16x16xf16, Subgroup>, %171: !spirv.coopmatrix<16x16xf16, Subgroup>, %172: !spirv.coopmatrix<16x16xf16, Subgroup>, %173: i32): // 2 preds: ^bb0, ^bb2
%174 = spirv.SLessThan %168, %cst288_i32 : i32
spirv.BranchConditional %174, ^bb2, ^bb3
^bb2: // pred: ^bb1
%175 = spirv.IMul %173, %cst320_i32 : i32
%176 = spirv.IMul %4, %cst160_i32 : i32
%177 = spirv.IAdd %175, %176 : i32
%178 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %177] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%179 = spirv.NV.CooperativeMatrixLoad %178, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%180 = spirv.IAdd %177, %cst2_i32 : i32
%181 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %180] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%182 = spirv.NV.CooperativeMatrixLoad %181, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%183 = spirv.IAdd %177, %cst80_i32 : i32
%184 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %183] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%185 = spirv.NV.CooperativeMatrixLoad %184, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%186 = spirv.IAdd %177, %cst82_i32 : i32
%187 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %186] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%188 = spirv.NV.CooperativeMatrixLoad %187, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%189 = spirv.IMul %173, %cst288_i32 : i32
%190 = spirv.SDiv %52, %cst32_i32 : i32
%191 = spirv.ISub %cst-1_i32, %190 : i32
%192 = spirv.Select %50, %191, %190 : i1, i32
%193 = spirv.IMul %192, %cst4_i32 : i32
%194 = spirv.IAdd %189, %193 : i32
%195 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%196 = spirv.NV.CooperativeMatrixLoad %195, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%197 = spirv.IAdd %194, %cst2_i32 : i32
%198 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%199 = spirv.NV.CooperativeMatrixLoad %198, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%200 = spirv.IAdd %194, %cst144_i32 : i32
%201 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%202 = spirv.NV.CooperativeMatrixLoad %201, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%203 = spirv.IAdd %194, %cst146_i32 : i32
%204 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%205 = spirv.NV.CooperativeMatrixLoad %204, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%206 = spirv.NV.CooperativeMatrixMulAdd %179, %196, %169 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%207 = spirv.NV.CooperativeMatrixMulAdd %182, %202, %206 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%208 = spirv.NV.CooperativeMatrixMulAdd %179, %199, %170 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%209 = spirv.NV.CooperativeMatrixMulAdd %182, %205, %208 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%210 = spirv.NV.CooperativeMatrixMulAdd %185, %196, %171 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%211 = spirv.NV.CooperativeMatrixMulAdd %188, %202, %210 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%212 = spirv.NV.CooperativeMatrixMulAdd %185, %199, %172 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%213 = spirv.NV.CooperativeMatrixMulAdd %188, %205, %212 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%214 = spirv.IAdd %168, %cst32_i32 : i32
%215 = spirv.SLessThan %214, %cst0_i32 : i32
%216 = spirv.ISub %cst-33_i32, %168 : i32
%217 = spirv.Select %215, %216, %214 : i1, i32
%218 = spirv.SDiv %217, %cst8_i32 : i32
%219 = spirv.ISub %cst-1_i32, %218 : i32
%220 = spirv.Select %215, %219, %218 : i1, i32
%221 = spirv.IAdd %49, %220 : i32
%222 = spirv.IAdd %221, %56 : i32
%223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%224 = spirv.Load "StorageBuffer" %223 : vector<4xf32>
%225 = spirv.SDiv %217, %cst32_i32 : i32
%226 = spirv.ISub %cst-1_i32, %225 : i32
%227 = spirv.Select %215, %226, %225 : i1, i32
%228 = spirv.GL.SAbs %227 : i32
%229 = spirv.GL.SAbs %cst2_i32 : i32
%230 = spirv.UMod %228, %229 : i32
%231 = spirv.IEqual %227, %228 : i32
%232 = spirv.SNegate %230 : i32
%233 = spirv.Select %231, %230, %232 : i1, i32
%234 = spirv.SLessThan %233, %cst0_i32 : i32
%235 = spirv.IAdd %233, %cst2_i32 : i32
%236 = spirv.Select %234, %235, %233 : i1, i32
%237 = spirv.IMul %227, %cst320_i32 : i32
%238 = spirv.IAdd %237, %63 : i32
%239 = spirv.SLessThan %227, %cst0_i32 : i32
%240 = spirv.ISub %cst-1_i32, %227 : i32
%241 = spirv.Select %239, %240, %227 : i1, i32
%242 = spirv.SDiv %241, %cst2_i32 : i32
%243 = spirv.ISub %cst-1_i32, %242 : i32
%244 = spirv.Select %239, %243, %242 : i1, i32
%245 = spirv.IMul %244, %cst-640_i32 : i32
%246 = spirv.IAdd %238, %245 : i32
%247 = spirv.IAdd %246, %55 : i32
%248 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %248, %224 : vector<4xf32>
%249 = spirv.IAdd %222, %cst1280_i32 : i32
%250 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %249] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%251 = spirv.Load "StorageBuffer" %250 : vector<4xf32>
%252 = spirv.IAdd %247, %cst160_i32 : i32
%253 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %252] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %253, %251 : vector<4xf32>
%254 = spirv.IMul %214, %cst40_i32 : i32
%255 = spirv.IAdd %254, %2 : i32
%256 = spirv.IAdd %255, %71 : i32
%257 = spirv.IAdd %256, %73 : i32
%258 = spirv.IAdd %257, %75 : i32
%259 = spirv.IAdd %258, %26 : i32
%260 = spirv.IAdd %259, %81 : i32
%261 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %260] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%262 = spirv.Load "StorageBuffer" %261 : vector<4xf32>
%263 = spirv.IMul %227, %cst288_i32 : i32
%264 = spirv.IAdd %263, %88 : i32
%265 = spirv.IMul %244, %cst-576_i32 : i32
%266 = spirv.IAdd %264, %265 : i32
%267 = spirv.IAdd %266, %80 : i32
%268 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %267] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %268, %262 : vector<4xf32>
%269 = spirv.IAdd %260, %cst640_i32 : i32
%270 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %269] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%271 = spirv.Load "StorageBuffer" %270 : vector<4xf32>
%272 = spirv.IAdd %267, %cst144_i32 : i32
%273 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %272] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %273, %271 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %96, %207 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %97, %209 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %98, %211 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %99, %213 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %100, %236 : i32
spirv.Branch ^bb1(%214, %207, %209, %211, %213, %236 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%101 = spirv.Load "Function" %100 : i32
%102 = spirv.Load "Function" %99 : !spirv.coopmatrix<16x16xf16, Subgroup>
%103 = spirv.Load "Function" %98 : !spirv.coopmatrix<16x16xf16, Subgroup>
%104 = spirv.Load "Function" %97 : !spirv.coopmatrix<16x16xf16, Subgroup>
%105 = spirv.Load "Function" %96 : !spirv.coopmatrix<16x16xf16, Subgroup>
%106 = spirv.IMul %4, %cst160_i32 : i32
%107 = spirv.IMul %101, %cst320_i32 : i32
%108 = spirv.IAdd %106, %107 : i32
%109 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %108] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%110 = spirv.NV.CooperativeMatrixLoad %109, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%111 = spirv.IAdd %108, %cst2_i32 : i32
%112 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %111] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%113 = spirv.NV.CooperativeMatrixLoad %112, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%114 = spirv.IAdd %108, %cst80_i32 : i32
%115 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%116 = spirv.NV.CooperativeMatrixLoad %115, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%117 = spirv.IAdd %108, %cst82_i32 : i32
%118 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%119 = spirv.NV.CooperativeMatrixLoad %118, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%120 = spirv.IMul %101, %cst288_i32 : i32
%121 = spirv.SDiv %52, %cst32_i32 : i32
%122 = spirv.ISub %cst-1_i32, %121 : i32
%123 = spirv.Select %50, %122, %121 : i1, i32
%124 = spirv.IMul %123, %cst4_i32 : i32
%125 = spirv.IAdd %120, %124 : i32
%126 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%127 = spirv.NV.CooperativeMatrixLoad %126, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%128 = spirv.IAdd %125, %cst2_i32 : i32
%129 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.IAdd %125, %cst144_i32 : i32
%132 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%134 = spirv.IAdd %125, %cst146_i32 : i32
%135 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %134] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%136 = spirv.NV.CooperativeMatrixLoad %135, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%137 = spirv.NV.CooperativeMatrixMulAdd %110, %127, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%138 = spirv.NV.CooperativeMatrixMulAdd %113, %133, %137 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%139 = spirv.NV.CooperativeMatrixMulAdd %110, %130, %104 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%140 = spirv.NV.CooperativeMatrixMulAdd %113, %136, %139 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%141 = spirv.NV.CooperativeMatrixMulAdd %116, %127, %103 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.NV.CooperativeMatrixMulAdd %119, %133, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%143 = spirv.NV.CooperativeMatrixMulAdd %116, %130, %102 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%144 = spirv.NV.CooperativeMatrixMulAdd %119, %136, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.IAdd %75, %32 : i32
%146 = spirv.IAdd %145, %124 : i32
%147 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %146] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%148 = spirv.NV.CooperativeMatrixLoad %147, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%149 = spirv.IAdd %146, %cst2_i32 : i32
%150 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %149] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%151 = spirv.NV.CooperativeMatrixLoad %150, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%152 = spirv.FAdd %138, %148 : !spirv.coopmatrix<16x16xf16, Subgroup>
%153 = spirv.FAdd %140, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>
%154 = spirv.FAdd %142, %148 : !spirv.coopmatrix<16x16xf16, Subgroup>
%155 = spirv.FAdd %144, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>
%156 = spirv.IMul %4, %cst1280_i32 : i32
%157 = spirv.IAdd %43, %156 : i32
%158 = spirv.IAdd %157, %75 : i32
%159 = spirv.IAdd %158, %38 : i32
%160 = spirv.IAdd %159, %124 : i32
%161 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %160] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %161, %152, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%162 = spirv.IAdd %160, %cst2_i32 : i32
%163 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %162] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %163, %153, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%164 = spirv.IAdd %160, %cst640_i32 : i32
%165 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %165, %154, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%166 = spirv.IAdd %160, %cst642_i32 : i32
%167 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %166] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %167, %155, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_36_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_36_matmul_18432x320x320 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_38 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_38_generic_18432x320 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [160 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index):
%c18432 = arith.constant 18432 : index
%c1 = arith.constant 1 : index
hal.return %c18432, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.func @forward_dispatch_38_generic_18432x320() "None" {
%cst160_i32 = spirv.Constant 160 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst_f32 = spirv.Constant 0.000000e+00 : f32
%cst_f32_0 = spirv.Constant 3.200000e+02 : f32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.SLessThan %3, %cst0_i32 : i32
%7 = spirv.ISub %cst-1_i32, %3 : i32
%8 = spirv.Select %6, %7, %3 : i1, i32
%9 = spirv.SDiv %8, %cst8_i32 : i32
%10 = spirv.ISub %cst-1_i32, %9 : i32
%11 = spirv.Select %6, %10, %9 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>
%12 = spirv.SLessThan %5, %cst0_i32 : i32
%13 = spirv.ISub %cst-1_i32, %5 : i32
%14 = spirv.Select %12, %13, %5 : i1, i32
%15 = spirv.SDiv %14, %cst4_i32 : i32
%16 = spirv.ISub %cst-1_i32, %15 : i32
%17 = spirv.Select %12, %16, %15 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spirv.IMul %19, %cst160_i32 : i32
%21 = spirv.IAdd %20, %1 : i32
%22 = spirv.IAdd %21, %11 : i32
%23 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %22] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>, i32, i32
%24 = spirv.Load "StorageBuffer" %23 : vector<2xf32>
%25 = spirv.CompositeExtract %24[0 : i32] : vector<2xf32>
%26 = spirv.CompositeExtract %24[1 : i32] : vector<2xf32>
%27 = spirv.FAdd %25, %26 : f32
%28 = spirv.GroupNonUniformShuffleXor <Subgroup> %27, %cst1_i32 : f32, i32
%29 = spirv.FAdd %27, %28 : f32
%30 = spirv.GroupNonUniformShuffleXor <Subgroup> %29, %cst2_i32 : f32, i32
%31 = spirv.FAdd %29, %30 : f32
%32 = spirv.GroupNonUniformShuffleXor <Subgroup> %31, %cst4_i32 : f32, i32
%33 = spirv.FAdd %31, %32 : f32
%34 = spirv.GroupNonUniformShuffleXor <Subgroup> %33, %cst8_i32 : f32, i32
%35 = spirv.FAdd %33, %34 : f32
%36 = spirv.GroupNonUniformShuffleXor <Subgroup> %35, %cst16_i32 : f32, i32
%37 = spirv.FAdd %35, %36 : f32
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>
%38 = spirv.UDiv %1, %cst32_i32 : i32
%39 = spirv.UMod %1, %cst32_i32 : i32
%40 = spirv.IEqual %39, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %40, ^bb1, ^bb2
^bb1: // pred: ^bb0
%56 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %56, %37 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%41 = spirv.GL.UMin %39, %cst4_i32 : i32
%42 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32
%43 = spirv.Load "Workgroup" %42 : f32
%44 = spirv.SGreaterThanEqual %39, %cst5_i32 : i32
%45 = spirv.Select %44, %cst_f32, %43 : i1, f32
%46 = spirv.GroupNonUniformShuffleXor <Subgroup> %45, %cst1_i32 : f32, i32
%47 = spirv.FAdd %45, %46 : f32
%48 = spirv.GroupNonUniformShuffleXor <Subgroup> %47, %cst2_i32 : f32, i32
%49 = spirv.FAdd %47, %48 : f32
%50 = spirv.GroupNonUniformShuffleXor <Subgroup> %49, %cst4_i32 : f32, i32
%51 = spirv.FAdd %49, %50 : f32
%52 = spirv.GroupNonUniformShuffle <Subgroup> %51, %cst0_i32 : f32, i32
%53 = spirv.FAdd %52, %cst_f32 : f32
%54 = spirv.FDiv %53, %cst_f32_0 : f32
%55 = spirv.IEqual %1, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %55, ^bb1, ^bb2
^bb1: // pred: ^bb0
%56 = spirv.IAdd %19, %17 : i32
%57 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %57, %54 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_38_generic_18432x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_38_generic_18432x320 "LocalSize", 160, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_39 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_39_generic_18432x320 ordinal(0) layout(#pipeline_layout9) attributes {translation_info = #translation3, workgroup_size = [160 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c18432 = arith.constant 18432 : index
%c1 = arith.constant 1 : index
hal.return %c18432, %c1, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, StorageBuffer16BitAccess, GroupNonUniformShuffle, Float16], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage]> {
spirv.GlobalVariable @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0__1 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0__0 bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
spirv.func @forward_dispatch_39_generic_18432x320() "None" {
%cst3_i32 = spirv.Constant 3 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst_f32 = spirv.Constant 0.000000e+00 : f32
%cst_vec_2xf32 = spirv.Constant dense<3.200000e+02> : vector<2xf32>
%cst_f32_0 = spirv.Constant 9.99999974E-6 : f32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%7 = spirv.Load "PushConstant" %6 : i32
%8 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%9 = spirv.Load "PushConstant" %8 : i32
%10 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%11 = spirv.Load "PushConstant" %10 : i32
%12 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst5_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<6 x i32, stride=4> [0])>, PushConstant>, i32, i32
%13 = spirv.Load "PushConstant" %12 : i32
%14 = spirv.SLessThan %3, %cst0_i32 : i32
%15 = spirv.ISub %cst-1_i32, %3 : i32
%16 = spirv.Select %14, %15, %3 : i1, i32
%17 = spirv.SDiv %16, %cst8_i32 : i32
%18 = spirv.ISub %cst-1_i32, %17 : i32
%19 = spirv.Select %14, %18, %17 : i1, i32
%__resource_var_0_0__1_addr = spirv.mlir.addressof @__resource_var_0_0__1 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>
%20 = spirv.SLessThan %5, %cst0_i32 : i32
%21 = spirv.ISub %cst-1_i32, %5 : i32
%22 = spirv.Select %20, %21, %5 : i1, i32
%23 = spirv.SDiv %22, %cst4_i32 : i32
%24 = spirv.ISub %cst-1_i32, %23 : i32
%25 = spirv.Select %20, %24, %23 : i1, i32
%__resource_var_0_0__0_addr = spirv.mlir.addressof @__resource_var_0_0__0 : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>
%26 = spirv.SLessThan %7, %cst0_i32 : i32
%27 = spirv.ISub %cst-1_i32, %7 : i32
%28 = spirv.Select %26, %27, %7 : i1, i32
%29 = spirv.SDiv %28, %cst4_i32 : i32
%30 = spirv.ISub %cst-1_i32, %29 : i32
%31 = spirv.Select %26, %30, %29 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
%32 = spirv.SLessThan %9, %cst0_i32 : i32
%33 = spirv.ISub %cst-1_i32, %9 : i32
%34 = spirv.Select %32, %33, %9 : i1, i32
%35 = spirv.SDiv %34, %cst4_i32 : i32
%36 = spirv.ISub %cst-1_i32, %35 : i32
%37 = spirv.Select %32, %36, %35 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
%38 = spirv.SLessThan %11, %cst0_i32 : i32
%39 = spirv.ISub %cst-1_i32, %11 : i32
%40 = spirv.Select %38, %39, %11 : i1, i32
%41 = spirv.SDiv %40, %cst4_i32 : i32
%42 = spirv.ISub %cst-1_i32, %41 : i32
%43 = spirv.Select %38, %42, %41 : i1, i32
%44 = spirv.SLessThan %13, %cst0_i32 : i32
%45 = spirv.ISub %cst-1_i32, %13 : i32
%46 = spirv.Select %44, %45, %13 : i1, i32
%47 = spirv.SDiv %46, %cst4_i32 : i32
%48 = spirv.ISub %cst-1_i32, %47 : i32
%49 = spirv.Select %44, %48, %47 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%50 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%51 = spirv.CompositeExtract %50[0 : i32] : vector<3xi32>
%52 = spirv.IMul %51, %cst160_i32 : i32
%53 = spirv.IAdd %52, %1 : i32
%54 = spirv.IAdd %53, %19 : i32
%55 = spirv.AccessChain %__resource_var_0_0__1_addr[%cst0_i32, %54] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>, i32, i32
%56 = spirv.Load "StorageBuffer" %55 : vector<2xf32>
%57 = spirv.IAdd %51, %25 : i32
%58 = spirv.AccessChain %__resource_var_0_0__0_addr[%cst0_i32, %57] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>, i32, i32
%59 = spirv.Load "StorageBuffer" %58 : f32
%60 = spirv.CompositeConstruct %59, %59 : (f32, f32) -> vector<2xf32>
%61 = spirv.FSub %56, %60 : vector<2xf32>
%62 = spirv.FMul %61, %61 : vector<2xf32>
%63 = spirv.CompositeExtract %62[0 : i32] : vector<2xf32>
%64 = spirv.CompositeExtract %62[1 : i32] : vector<2xf32>
%65 = spirv.FAdd %63, %64 : f32
%66 = spirv.GroupNonUniformShuffleXor <Subgroup> %65, %cst1_i32 : f32, i32
%67 = spirv.FAdd %65, %66 : f32
%68 = spirv.GroupNonUniformShuffleXor <Subgroup> %67, %cst2_i32 : f32, i32
%69 = spirv.FAdd %67, %68 : f32
%70 = spirv.GroupNonUniformShuffleXor <Subgroup> %69, %cst4_i32 : f32, i32
%71 = spirv.FAdd %69, %70 : f32
%72 = spirv.GroupNonUniformShuffleXor <Subgroup> %71, %cst8_i32 : f32, i32
%73 = spirv.FAdd %71, %72 : f32
%74 = spirv.GroupNonUniformShuffleXor <Subgroup> %73, %cst16_i32 : f32, i32
%75 = spirv.FAdd %73, %74 : f32
%__workgroup_mem__8_addr = spirv.mlir.addressof @__workgroup_mem__8 : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>
%76 = spirv.UDiv %1, %cst32_i32 : i32
%77 = spirv.UMod %1, %cst32_i32 : i32
%78 = spirv.IEqual %77, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %78, ^bb1, ^bb2
^bb1: // pred: ^bb0
%116 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %116, %75 : f32
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%79 = spirv.GL.UMin %77, %cst4_i32 : i32
%80 = spirv.AccessChain %__workgroup_mem__8_addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x f32>)>, Workgroup>, i32, i32
%81 = spirv.Load "Workgroup" %80 : f32
%82 = spirv.SGreaterThanEqual %77, %cst5_i32 : i32
%83 = spirv.Select %82, %cst_f32, %81 : i1, f32
%84 = spirv.GroupNonUniformShuffleXor <Subgroup> %83, %cst1_i32 : f32, i32
%85 = spirv.FAdd %83, %84 : f32
%86 = spirv.GroupNonUniformShuffleXor <Subgroup> %85, %cst2_i32 : f32, i32
%87 = spirv.FAdd %85, %86 : f32
%88 = spirv.GroupNonUniformShuffleXor <Subgroup> %87, %cst4_i32 : f32, i32
%89 = spirv.FAdd %87, %88 : f32
%90 = spirv.GroupNonUniformShuffle <Subgroup> %89, %cst0_i32 : f32, i32
%91 = spirv.FAdd %90, %cst_f32 : f32
%92 = spirv.IAdd %53, %31 : i32
%93 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32
%94 = spirv.Load "StorageBuffer" %93 : vector<2xf16>
%95 = spirv.IAdd %1, %37 : i32
%96 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %95] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32
%97 = spirv.Load "StorageBuffer" %96 : vector<2xf16>
%98 = spirv.IAdd %1, %43 : i32
%99 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32
%100 = spirv.Load "StorageBuffer" %99 : vector<2xf16>
%101 = spirv.CompositeConstruct %91, %91 : (f32, f32) -> vector<2xf32>
%102 = spirv.FConvert %cst_f32_0 : f32 to f16
%103 = spirv.CompositeConstruct %102, %102 : (f16, f16) -> vector<2xf16>
%104 = spirv.FDiv %101, %cst_vec_2xf32 : vector<2xf32>
%105 = spirv.FConvert %104 : vector<2xf32> to vector<2xf16>
%106 = spirv.FAdd %105, %103 : vector<2xf16>
%107 = spirv.FConvert %59 : f32 to f16
%108 = spirv.CompositeConstruct %107, %107 : (f16, f16) -> vector<2xf16>
%109 = spirv.GL.InverseSqrt %106 : vector<2xf16>
%110 = spirv.FSub %94, %108 : vector<2xf16>
%111 = spirv.FMul %110, %109 : vector<2xf16>
%112 = spirv.FMul %111, %97 : vector<2xf16>
%113 = spirv.FAdd %112, %100 : vector<2xf16>
%114 = spirv.IAdd %53, %49 : i32
%115 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<2xf16>, stride=4> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %115, %113 : vector<2xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_39_generic_18432x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_39_generic_18432x320 "LocalSize", 160, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_40 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_40_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout3) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c5 = arith.constant 5 : index
%c288 = arith.constant 288 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c288, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_40_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst642_i32 = spirv.Constant 642 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst36_i32 = spirv.Constant 36 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst2560_i32 = spirv.Constant 2560 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.SLessThan %8, %cst0_i32 : i32
%14 = spirv.ISub %cst-1_i32, %8 : i32
%15 = spirv.Select %13, %14, %8 : i1, i32
%16 = spirv.SDiv %15, %cst16_i32 : i32
%17 = spirv.ISub %cst-1_i32, %16 : i32
%18 = spirv.Select %13, %17, %16 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%19 = spirv.SLessThan %10, %cst0_i32 : i32
%20 = spirv.ISub %cst-1_i32, %10 : i32
%21 = spirv.Select %19, %20, %10 : i1, i32
%22 = spirv.SDiv %21, %cst16_i32 : i32
%23 = spirv.ISub %cst-1_i32, %22 : i32
%24 = spirv.Select %19, %23, %22 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%25 = spirv.SLessThan %12, %cst0_i32 : i32
%26 = spirv.ISub %cst-1_i32, %12 : i32
%27 = spirv.Select %25, %26, %12 : i1, i32
%28 = spirv.SDiv %27, %cst16_i32 : i32
%29 = spirv.ISub %cst-1_i32, %28 : i32
%30 = spirv.Select %25, %29, %28 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%32 = spirv.CompositeExtract %31[1 : i32] : vector<3xi32>
%33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%34 = spirv.CompositeExtract %33[0 : i32] : vector<3xi32>
%35 = spirv.IMul %32, %cst2560_i32 : i32
%36 = spirv.IAdd %35, %2 : i32
%37 = spirv.IMul %4, %cst640_i32 : i32
%38 = spirv.IAdd %36, %37 : i32
%39 = spirv.IMul %6, %cst1280_i32 : i32
%40 = spirv.IAdd %38, %39 : i32
%41 = spirv.IAdd %40, %18 : i32
%42 = spirv.SLessThan %2, %cst0_i32 : i32
%43 = spirv.ISub %cst-1_i32, %2 : i32
%44 = spirv.Select %42, %43, %2 : i1, i32
%45 = spirv.SDiv %44, %cst4_i32 : i32
%46 = spirv.ISub %cst-1_i32, %45 : i32
%47 = spirv.Select %42, %46, %45 : i1, i32
%48 = spirv.IMul %47, %cst36_i32 : i32
%49 = spirv.IAdd %41, %48 : i32
%50 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%51 = spirv.Load "StorageBuffer" %50 : vector<4xf32>
%52 = spirv.IMul %4, %cst80_i32 : i32
%53 = spirv.IAdd %2, %52 : i32
%54 = spirv.IMul %6, %cst160_i32 : i32
%55 = spirv.IAdd %53, %54 : i32
%56 = spirv.IAdd %55, %47 : i32
%57 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %56] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %57, %51 : vector<4xf32>
%58 = spirv.IAdd %49, %cst1280_i32 : i32
%59 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%60 = spirv.Load "StorageBuffer" %59 : vector<4xf32>
%61 = spirv.IAdd %56, %cst160_i32 : i32
%62 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %62, %60 : vector<4xf32>
%63 = spirv.IMul %4, %cst320_i32 : i32
%64 = spirv.IAdd %2, %63 : i32
%65 = spirv.IMul %6, %cst640_i32 : i32
%66 = spirv.IAdd %64, %65 : i32
%67 = spirv.IMul %34, %cst8_i32 : i32
%68 = spirv.IAdd %66, %67 : i32
%69 = spirv.IAdd %68, %24 : i32
%70 = spirv.SDiv %44, %cst8_i32 : i32
%71 = spirv.ISub %cst-1_i32, %70 : i32
%72 = spirv.Select %42, %71, %70 : i1, i32
%73 = spirv.IMul %72, %cst32_i32 : i32
%74 = spirv.IAdd %69, %73 : i32
%75 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%76 = spirv.Load "StorageBuffer" %75 : vector<4xf32>
%77 = spirv.IMul %4, %cst72_i32 : i32
%78 = spirv.IAdd %2, %77 : i32
%79 = spirv.IMul %6, %cst144_i32 : i32
%80 = spirv.IAdd %78, %79 : i32
%81 = spirv.IAdd %80, %72 : i32
%82 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %82, %76 : vector<4xf32>
%83 = spirv.IAdd %74, %cst640_i32 : i32
%84 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%85 = spirv.Load "StorageBuffer" %84 : vector<4xf32>
%86 = spirv.IAdd %81, %cst144_i32 : i32
%87 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %87, %85 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%88 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%89 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%90 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%91 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%92 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%149: i32, %150: !spirv.coopmatrix<16x16xf16, Subgroup>, %151: !spirv.coopmatrix<16x16xf16, Subgroup>, %152: !spirv.coopmatrix<16x16xf16, Subgroup>, %153: !spirv.coopmatrix<16x16xf16, Subgroup>, %154: i32): // 2 preds: ^bb0, ^bb2
%155 = spirv.SLessThan %149, %cst288_i32 : i32
spirv.BranchConditional %155, ^bb2, ^bb3
^bb2: // pred: ^bb1
%156 = spirv.IMul %154, %cst320_i32 : i32
%157 = spirv.IMul %4, %cst160_i32 : i32
%158 = spirv.IAdd %156, %157 : i32
%159 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%160 = spirv.NV.CooperativeMatrixLoad %159, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%161 = spirv.IAdd %158, %cst2_i32 : i32
%162 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %161] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%163 = spirv.NV.CooperativeMatrixLoad %162, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%164 = spirv.IAdd %158, %cst80_i32 : i32
%165 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%166 = spirv.NV.CooperativeMatrixLoad %165, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%167 = spirv.IAdd %158, %cst82_i32 : i32
%168 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%169 = spirv.NV.CooperativeMatrixLoad %168, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%170 = spirv.IMul %154, %cst288_i32 : i32
%171 = spirv.SDiv %44, %cst32_i32 : i32
%172 = spirv.ISub %cst-1_i32, %171 : i32
%173 = spirv.Select %42, %172, %171 : i1, i32
%174 = spirv.IMul %173, %cst4_i32 : i32
%175 = spirv.IAdd %170, %174 : i32
%176 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%177 = spirv.NV.CooperativeMatrixLoad %176, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%178 = spirv.IAdd %175, %cst2_i32 : i32
%179 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%180 = spirv.NV.CooperativeMatrixLoad %179, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%181 = spirv.IAdd %175, %cst144_i32 : i32
%182 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%183 = spirv.NV.CooperativeMatrixLoad %182, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%184 = spirv.IAdd %175, %cst146_i32 : i32
%185 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %184] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%186 = spirv.NV.CooperativeMatrixLoad %185, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%187 = spirv.NV.CooperativeMatrixMulAdd %160, %177, %150 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%188 = spirv.NV.CooperativeMatrixMulAdd %163, %183, %187 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%189 = spirv.NV.CooperativeMatrixMulAdd %160, %180, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%190 = spirv.NV.CooperativeMatrixMulAdd %163, %186, %189 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%191 = spirv.NV.CooperativeMatrixMulAdd %166, %177, %152 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%192 = spirv.NV.CooperativeMatrixMulAdd %169, %183, %191 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%193 = spirv.NV.CooperativeMatrixMulAdd %166, %180, %153 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%194 = spirv.NV.CooperativeMatrixMulAdd %169, %186, %193 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%195 = spirv.IAdd %149, %cst32_i32 : i32
%196 = spirv.SLessThan %195, %cst0_i32 : i32
%197 = spirv.ISub %cst-33_i32, %149 : i32
%198 = spirv.Select %196, %197, %195 : i1, i32
%199 = spirv.SDiv %198, %cst8_i32 : i32
%200 = spirv.ISub %cst-1_i32, %199 : i32
%201 = spirv.Select %196, %200, %199 : i1, i32
%202 = spirv.IAdd %41, %201 : i32
%203 = spirv.IAdd %202, %48 : i32
%204 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%205 = spirv.Load "StorageBuffer" %204 : vector<4xf32>
%206 = spirv.SDiv %198, %cst32_i32 : i32
%207 = spirv.ISub %cst-1_i32, %206 : i32
%208 = spirv.Select %196, %207, %206 : i1, i32
%209 = spirv.GL.SAbs %208 : i32
%210 = spirv.GL.SAbs %cst2_i32 : i32
%211 = spirv.UMod %209, %210 : i32
%212 = spirv.IEqual %208, %209 : i32
%213 = spirv.SNegate %211 : i32
%214 = spirv.Select %212, %211, %213 : i1, i32
%215 = spirv.SLessThan %214, %cst0_i32 : i32
%216 = spirv.IAdd %214, %cst2_i32 : i32
%217 = spirv.Select %215, %216, %214 : i1, i32
%218 = spirv.IMul %208, %cst320_i32 : i32
%219 = spirv.IAdd %218, %55 : i32
%220 = spirv.SLessThan %208, %cst0_i32 : i32
%221 = spirv.ISub %cst-1_i32, %208 : i32
%222 = spirv.Select %220, %221, %208 : i1, i32
%223 = spirv.SDiv %222, %cst2_i32 : i32
%224 = spirv.ISub %cst-1_i32, %223 : i32
%225 = spirv.Select %220, %224, %223 : i1, i32
%226 = spirv.IMul %225, %cst-640_i32 : i32
%227 = spirv.IAdd %219, %226 : i32
%228 = spirv.IAdd %227, %47 : i32
%229 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %228] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %229, %205 : vector<4xf32>
%230 = spirv.IAdd %203, %cst1280_i32 : i32
%231 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %230] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%232 = spirv.Load "StorageBuffer" %231 : vector<4xf32>
%233 = spirv.IAdd %228, %cst160_i32 : i32
%234 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %233] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %234, %232 : vector<4xf32>
%235 = spirv.IMul %195, %cst40_i32 : i32
%236 = spirv.IAdd %235, %2 : i32
%237 = spirv.IAdd %236, %63 : i32
%238 = spirv.IAdd %237, %65 : i32
%239 = spirv.IAdd %238, %67 : i32
%240 = spirv.IAdd %239, %24 : i32
%241 = spirv.IAdd %240, %73 : i32
%242 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %241] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%243 = spirv.Load "StorageBuffer" %242 : vector<4xf32>
%244 = spirv.IMul %208, %cst288_i32 : i32
%245 = spirv.IAdd %244, %80 : i32
%246 = spirv.IMul %225, %cst-576_i32 : i32
%247 = spirv.IAdd %245, %246 : i32
%248 = spirv.IAdd %247, %72 : i32
%249 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %249, %243 : vector<4xf32>
%250 = spirv.IAdd %241, %cst640_i32 : i32
%251 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %250] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%252 = spirv.Load "StorageBuffer" %251 : vector<4xf32>
%253 = spirv.IAdd %248, %cst144_i32 : i32
%254 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %253] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %254, %252 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %88, %188 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %89, %190 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %90, %192 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %91, %194 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %92, %217 : i32
spirv.Branch ^bb1(%195, %188, %190, %192, %194, %217 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%93 = spirv.Load "Function" %92 : i32
%94 = spirv.Load "Function" %91 : !spirv.coopmatrix<16x16xf16, Subgroup>
%95 = spirv.Load "Function" %90 : !spirv.coopmatrix<16x16xf16, Subgroup>
%96 = spirv.Load "Function" %89 : !spirv.coopmatrix<16x16xf16, Subgroup>
%97 = spirv.Load "Function" %88 : !spirv.coopmatrix<16x16xf16, Subgroup>
%98 = spirv.IMul %4, %cst160_i32 : i32
%99 = spirv.IMul %93, %cst320_i32 : i32
%100 = spirv.IAdd %98, %99 : i32
%101 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %100] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%102 = spirv.NV.CooperativeMatrixLoad %101, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%103 = spirv.IAdd %100, %cst2_i32 : i32
%104 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%105 = spirv.NV.CooperativeMatrixLoad %104, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%106 = spirv.IAdd %100, %cst80_i32 : i32
%107 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%108 = spirv.NV.CooperativeMatrixLoad %107, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%109 = spirv.IAdd %100, %cst82_i32 : i32
%110 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%111 = spirv.NV.CooperativeMatrixLoad %110, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%112 = spirv.IMul %93, %cst288_i32 : i32
%113 = spirv.SDiv %44, %cst32_i32 : i32
%114 = spirv.ISub %cst-1_i32, %113 : i32
%115 = spirv.Select %42, %114, %113 : i1, i32
%116 = spirv.IMul %115, %cst4_i32 : i32
%117 = spirv.IAdd %112, %116 : i32
%118 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%119 = spirv.NV.CooperativeMatrixLoad %118, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%120 = spirv.IAdd %117, %cst2_i32 : i32
%121 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %120] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%122 = spirv.NV.CooperativeMatrixLoad %121, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%123 = spirv.IAdd %117, %cst144_i32 : i32
%124 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%125 = spirv.NV.CooperativeMatrixLoad %124, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%126 = spirv.IAdd %117, %cst146_i32 : i32
%127 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%128 = spirv.NV.CooperativeMatrixLoad %127, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%129 = spirv.NV.CooperativeMatrixMulAdd %102, %119, %97 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%130 = spirv.NV.CooperativeMatrixMulAdd %105, %125, %129 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.NV.CooperativeMatrixMulAdd %102, %122, %96 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%132 = spirv.NV.CooperativeMatrixMulAdd %105, %128, %131 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%133 = spirv.NV.CooperativeMatrixMulAdd %108, %119, %95 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%134 = spirv.NV.CooperativeMatrixMulAdd %111, %125, %133 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%135 = spirv.NV.CooperativeMatrixMulAdd %108, %122, %94 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%136 = spirv.NV.CooperativeMatrixMulAdd %111, %128, %135 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%137 = spirv.IMul %4, %cst1280_i32 : i32
%138 = spirv.IAdd %35, %137 : i32
%139 = spirv.IAdd %138, %67 : i32
%140 = spirv.IAdd %139, %30 : i32
%141 = spirv.IAdd %140, %116 : i32
%142 = spirv.IAdd %141, %cst642_i32 : i32
%143 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %143, %136, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%144 = spirv.IAdd %141, %cst640_i32 : i32
%145 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %144] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %145, %134, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%146 = spirv.IAdd %141, %cst2_i32 : i32
%147 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %146] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %147, %132, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%148 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %141] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %148, %130, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_40_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_40_matmul_18432x320x320 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_43 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_43_generic_2x9216x5x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 1 : index, 4 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c2 = arith.constant 2 : index
%c5 = arith.constant 5 : index
%c4608 = arith.constant 4608 : index
hal.return %c2, %c5, %c4608 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_43_generic_2x9216x5x64() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst147456_i32 = spirv.Constant 147456 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst-4_i32 = spirv.Constant -4 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst8_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst8_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.UDiv %17, %cst2304_i32 : i32
%19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32>
%21 = spirv.UMod %17, %cst2304_i32 : i32
%22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%24 = spirv.IMul %21, %cst-4_i32 : i32
%25 = spirv.IAdd %24, %cst9216_i32 : i32
%26 = spirv.SLessThan %25, %cst4_i32 : i32
%27 = spirv.Select %26, %25, %cst4_i32 : i1, i32
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32>
%30 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%31 = spirv.CompositeExtract %30[0 : i32] : vector<3xi32>
spirv.mlir.loop {
spirv.Branch ^bb1(%29 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spirv.SLessThan %32, %27 : i32
spirv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
%34 = spirv.IMul %32, %cst80_i32 : i32
%35 = spirv.IMul %21, %cst320_i32 : i32
%36 = spirv.IAdd %34, %35 : i32
%37 = spirv.IMul %23, %cst8_i32 : i32
%38 = spirv.IAdd %36, %37 : i32
%39 = spirv.IAdd %38, %31 : i32
%40 = spirv.IMul %18, %cst737280_i32 : i32
%41 = spirv.IAdd %39, %40 : i32
%42 = spirv.IMul %20, %cst16_i32 : i32
%43 = spirv.IAdd %41, %42 : i32
%44 = spirv.IAdd %43, %9 : i32
%45 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %44] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%46 = spirv.Load "StorageBuffer" %45 : vector<4xf16>
%47 = spirv.IMul %32, %cst16_i32 : i32
%48 = spirv.IMul %21, %cst64_i32 : i32
%49 = spirv.IAdd %47, %48 : i32
%50 = spirv.IAdd %49, %37 : i32
%51 = spirv.IAdd %50, %31 : i32
%52 = spirv.IAdd %51, %40 : i32
%53 = spirv.IMul %20, %cst147456_i32 : i32
%54 = spirv.IAdd %52, %53 : i32
%55 = spirv.IAdd %54, %15 : i32
%56 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %56, %46 : vector<4xf16>
%57 = spirv.IAdd %32, %cst4_i32 : i32
spirv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_43_generic_2x9216x5x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_43_generic_2x9216x5x64 "LocalSize", 8, 1, 4
}
}
}
}
hal.executable private @forward_dispatch_44 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_44_generic_2x5x9216x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c2 = arith.constant 2 : index
%c2304 = arith.constant 2304 : index
%c10 = arith.constant 10 : index
hal.return %c2, %c2304, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_44_generic_2x5x9216x64() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst147456_i32 = spirv.Constant 147456 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst_f32 = spirv.Constant 0.353553385 : f32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst8_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst8_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.UDiv %17, %cst5_i32 : i32
%19 = spirv.UMod %17, %cst5_i32 : i32
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[1 : i32] : vector<3xi32>
%22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%24 = spirv.FConvert %cst_f32 : f32 to f16
%25 = spirv.CompositeConstruct %24, %24, %24, %24 : (f16, f16, f16, f16) -> vector<4xf16>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32>
%28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%29 = spirv.CompositeExtract %28[0 : i32] : vector<3xi32>
%30 = spirv.IMul %27, %cst80_i32 : i32
%31 = spirv.IMul %21, %cst320_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IMul %23, %cst8_i32 : i32
%34 = spirv.IAdd %32, %33 : i32
%35 = spirv.IAdd %34, %29 : i32
%36 = spirv.IMul %18, %cst737280_i32 : i32
%37 = spirv.IAdd %35, %36 : i32
%38 = spirv.IMul %19, %cst16_i32 : i32
%39 = spirv.IAdd %37, %38 : i32
%40 = spirv.IAdd %39, %9 : i32
%41 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%42 = spirv.Load "StorageBuffer" %41 : vector<4xf16>
%43 = spirv.FMul %42, %25 : vector<4xf16>
%44 = spirv.IMul %27, %cst16_i32 : i32
%45 = spirv.IMul %21, %cst64_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IAdd %46, %33 : i32
%48 = spirv.IAdd %47, %29 : i32
%49 = spirv.IAdd %48, %36 : i32
%50 = spirv.IMul %19, %cst147456_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IAdd %51, %15 : i32
%53 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %53, %43 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_44_generic_2x5x9216x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_44_generic_2x5x9216x64 "LocalSize", 8, 4, 1
}
}
}
}
hal.executable private @forward_dispatch_45 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_45_generic_2x320x9216 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c72 = arith.constant 72 : index
%c320 = arith.constant 320 : index
%c2 = arith.constant 2 : index
hal.return %c72, %c320, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_45_generic_2x320x9216() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst2304_i32 = spirv.Constant 2304 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst960_i32 = spirv.Constant 960 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst2949120_i32 = spirv.Constant 2949120 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst40960_i32 = spirv.Constant 40960 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 0.353553385 : f32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst8_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32>
%22 = spirv.FConvert %cst_f32 : f32 to f16
%23 = spirv.CompositeConstruct %22, %22, %22, %22 : (f16, f16, f16, f16) -> vector<4xf16>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32>
%26 = spirv.IMul %21, %cst40960_i32 : i32
%27 = spirv.IMul %25, %cst1280_i32 : i32
%28 = spirv.IAdd %26, %27 : i32
%29 = spirv.IMul %17, %cst2949120_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IAdd %30, %19 : i32
%32 = spirv.IAdd %31, %9 : i32
%33 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %32] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%34 = spirv.Load "StorageBuffer" %33 : f16
%35 = spirv.IAdd %32, %cst320_i32 : i32
%36 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %35] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%37 = spirv.Load "StorageBuffer" %36 : f16
%38 = spirv.IAdd %32, %cst640_i32 : i32
%39 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%40 = spirv.Load "StorageBuffer" %39 : f16
%41 = spirv.IAdd %32, %cst960_i32 : i32
%42 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%43 = spirv.Load "StorageBuffer" %42 : f16
%44 = spirv.CompositeConstruct %34, %37, %40, %43 : (f16, f16, f16, f16) -> vector<4xf16>
%45 = spirv.FMul %44, %23 : vector<4xf16>
%46 = spirv.IMul %21, %cst32_i32 : i32
%47 = spirv.IAdd %46, %25 : i32
%48 = spirv.IMul %17, %cst737280_i32 : i32
%49 = spirv.IAdd %47, %48 : i32
%50 = spirv.IMul %19, %cst2304_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IAdd %51, %15 : i32
%53 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %53, %45 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_45_generic_2x320x9216, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_45_generic_2x320x9216 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_46 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_46_batch_matmul_10x9216x9216x64 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c144 = arith.constant 144 : index
%c10 = arith.constant 10 : index
hal.return %c144, %c144, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_46_batch_matmul_10x9216x9216x64() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst1152_i32 = spirv.Constant 1152 : i32
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst18434_i32 = spirv.Constant 18434 : i32
%cst10616832_i32 = spirv.Constant 10616832 : i32
%cst434_i32 = spirv.Constant 434 : i32
%cst290_i32 = spirv.Constant 290 : i32
%cst402_i32 = spirv.Constant 402 : i32
%cst400_i32 = spirv.Constant 400 : i32
%cst322_i32 = spirv.Constant 322 : i32
%cst432_i32 = spirv.Constant 432 : i32
%cst55296_i32 = spirv.Constant 55296 : i32
%cst36864_i32 = spirv.Constant 36864 : i32
%cst480_i32 = spirv.Constant 480 : i32
%cst260_i32 = spirv.Constant 260 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst1144_i32 = spirv.Constant 1144 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst18432_i32 = spirv.Constant 18432 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst128_i32 = spirv.Constant 128 : i32
%cst512_i32 = spirv.Constant 512 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.SLessThan %8, %cst0_i32 : i32
%14 = spirv.ISub %cst-1_i32, %8 : i32
%15 = spirv.Select %13, %14, %8 : i1, i32
%16 = spirv.SDiv %15, %cst16_i32 : i32
%17 = spirv.ISub %cst-1_i32, %16 : i32
%18 = spirv.Select %13, %17, %16 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%19 = spirv.SLessThan %10, %cst0_i32 : i32
%20 = spirv.ISub %cst-1_i32, %10 : i32
%21 = spirv.Select %19, %20, %10 : i1, i32
%22 = spirv.SDiv %21, %cst16_i32 : i32
%23 = spirv.ISub %cst-1_i32, %22 : i32
%24 = spirv.Select %19, %23, %22 : i1, i32
%25 = spirv.SLessThan %12, %cst0_i32 : i32
%26 = spirv.ISub %cst-1_i32, %12 : i32
%27 = spirv.Select %25, %26, %12 : i1, i32
%28 = spirv.SDiv %27, %cst16_i32 : i32
%29 = spirv.ISub %cst-1_i32, %28 : i32
%30 = spirv.Select %25, %29, %28 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32>
%33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32>
%35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32>
%37 = spirv.IMul %34, %cst512_i32 : i32
%38 = spirv.IAdd %37, %2 : i32
%39 = spirv.IMul %4, %cst128_i32 : i32
%40 = spirv.IAdd %38, %39 : i32
%41 = spirv.IMul %6, %cst256_i32 : i32
%42 = spirv.IAdd %40, %41 : i32
%43 = spirv.IMul %32, %cst73728_i32 : i32
%44 = spirv.IAdd %42, %43 : i32
%45 = spirv.IAdd %44, %18 : i32
%46 = spirv.SLessThan %2, %cst0_i32 : i32
%47 = spirv.ISub %cst-1_i32, %2 : i32
%48 = spirv.Select %46, %47, %2 : i1, i32
%49 = spirv.SDiv %48, %cst4_i32 : i32
%50 = spirv.ISub %cst-1_i32, %49 : i32
%51 = spirv.Select %46, %50, %49 : i1, i32
%52 = spirv.IMul %51, %cst4_i32 : i32
%53 = spirv.IAdd %45, %52 : i32
%54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%55 = spirv.Load "StorageBuffer" %54 : vector<4xf32>
%56 = spirv.IMul %4, %cst80_i32 : i32
%57 = spirv.IAdd %2, %56 : i32
%58 = spirv.IMul %6, %cst160_i32 : i32
%59 = spirv.IAdd %57, %58 : i32
%60 = spirv.IAdd %59, %51 : i32
%61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %61, %55 : vector<4xf32>
%62 = spirv.IAdd %53, %cst256_i32 : i32
%63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%64 = spirv.Load "StorageBuffer" %63 : vector<4xf32>
%65 = spirv.IAdd %60, %cst160_i32 : i32
%66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %66, %64 : vector<4xf32>
%67 = spirv.IMul %4, %cst9216_i32 : i32
%68 = spirv.IAdd %2, %67 : i32
%69 = spirv.IMul %6, %cst18432_i32 : i32
%70 = spirv.IAdd %68, %69 : i32
%71 = spirv.IMul %36, %cst8_i32 : i32
%72 = spirv.IAdd %70, %71 : i32
%73 = spirv.IAdd %72, %43 : i32
%74 = spirv.IAdd %73, %24 : i32
%75 = spirv.SDiv %48, %cst8_i32 : i32
%76 = spirv.ISub %cst-1_i32, %75 : i32
%77 = spirv.Select %46, %76, %75 : i1, i32
%78 = spirv.IMul %77, %cst1144_i32 : i32
%79 = spirv.IAdd %74, %78 : i32
%80 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%81 = spirv.Load "StorageBuffer" %80 : vector<4xf32>
%82 = spirv.IMul %4, %cst72_i32 : i32
%83 = spirv.IAdd %2, %82 : i32
%84 = spirv.IMul %6, %cst144_i32 : i32
%85 = spirv.IAdd %83, %84 : i32
%86 = spirv.IAdd %85, %77 : i32
%87 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %87, %81 : vector<4xf32>
%88 = spirv.IAdd %79, %cst18432_i32 : i32
%89 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%90 = spirv.Load "StorageBuffer" %89 : vector<4xf32>
%91 = spirv.IAdd %86, %cst144_i32 : i32
%92 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %92, %90 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%93 = spirv.IMul %4, %cst160_i32 : i32
%94 = spirv.IMul %6, %cst320_i32 : i32
%95 = spirv.IAdd %93, %94 : i32
%96 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %95] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%97 = spirv.NV.CooperativeMatrixLoad %96, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%98 = spirv.IAdd %95, %cst2_i32 : i32
%99 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%100 = spirv.NV.CooperativeMatrixLoad %99, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%101 = spirv.IAdd %95, %cst80_i32 : i32
%102 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%103 = spirv.NV.CooperativeMatrixLoad %102, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%104 = spirv.IAdd %95, %cst82_i32 : i32
%105 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %104] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%106 = spirv.NV.CooperativeMatrixLoad %105, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%107 = spirv.IMul %6, %cst288_i32 : i32
%108 = spirv.SDiv %48, %cst32_i32 : i32
%109 = spirv.ISub %cst-1_i32, %108 : i32
%110 = spirv.Select %46, %109, %108 : i1, i32
%111 = spirv.IMul %110, %cst4_i32 : i32
%112 = spirv.IAdd %107, %111 : i32
%113 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%114 = spirv.NV.CooperativeMatrixLoad %113, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%115 = spirv.IAdd %112, %cst2_i32 : i32
%116 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%117 = spirv.NV.CooperativeMatrixLoad %116, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%118 = spirv.IAdd %112, %cst144_i32 : i32
%119 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %118] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%120 = spirv.NV.CooperativeMatrixLoad %119, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%121 = spirv.IAdd %112, %cst146_i32 : i32
%122 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%123 = spirv.NV.CooperativeMatrixLoad %122, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%124 = spirv.NV.CooperativeMatrixMulAdd %97, %114, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%125 = spirv.NV.CooperativeMatrixMulAdd %100, %120, %124 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%126 = spirv.NV.CooperativeMatrixMulAdd %97, %117, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%127 = spirv.NV.CooperativeMatrixMulAdd %100, %123, %126 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%128 = spirv.NV.CooperativeMatrixMulAdd %103, %114, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%129 = spirv.NV.CooperativeMatrixMulAdd %106, %120, %128 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%130 = spirv.NV.CooperativeMatrixMulAdd %103, %117, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.NV.CooperativeMatrixMulAdd %106, %123, %130 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%132 = spirv.IAdd %53, %cst4_i32 : i32
%133 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %132] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%134 = spirv.Load "StorageBuffer" %133 : vector<4xf32>
%135 = spirv.IAdd %60, %cst320_i32 : i32
%136 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %136, %134 : vector<4xf32>
%137 = spirv.IAdd %53, %cst260_i32 : i32
%138 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %137] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%139 = spirv.Load "StorageBuffer" %138 : vector<4xf32>
%140 = spirv.IAdd %60, %cst480_i32 : i32
%141 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %140] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %141, %139 : vector<4xf32>
%142 = spirv.IAdd %79, %cst36864_i32 : i32
%143 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%144 = spirv.Load "StorageBuffer" %143 : vector<4xf32>
%145 = spirv.IAdd %86, %cst288_i32 : i32
%146 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %146, %144 : vector<4xf32>
%147 = spirv.IAdd %79, %cst55296_i32 : i32
%148 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %147] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%149 = spirv.Load "StorageBuffer" %148 : vector<4xf32>
%150 = spirv.IAdd %86, %cst432_i32 : i32
%151 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %150] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %151, %149 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%152 = spirv.IAdd %95, %cst320_i32 : i32
%153 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %152] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%154 = spirv.NV.CooperativeMatrixLoad %153, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%155 = spirv.IAdd %95, %cst322_i32 : i32
%156 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %155] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%157 = spirv.NV.CooperativeMatrixLoad %156, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%158 = spirv.IAdd %95, %cst400_i32 : i32
%159 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%160 = spirv.NV.CooperativeMatrixLoad %159, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%161 = spirv.IAdd %95, %cst402_i32 : i32
%162 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %161] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%163 = spirv.NV.CooperativeMatrixLoad %162, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%164 = spirv.IAdd %112, %cst288_i32 : i32
%165 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%166 = spirv.NV.CooperativeMatrixLoad %165, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%167 = spirv.IAdd %112, %cst290_i32 : i32
%168 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%169 = spirv.NV.CooperativeMatrixLoad %168, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%170 = spirv.IAdd %112, %cst432_i32 : i32
%171 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %170] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%172 = spirv.NV.CooperativeMatrixLoad %171, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%173 = spirv.IAdd %112, %cst434_i32 : i32
%174 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %173] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%175 = spirv.NV.CooperativeMatrixLoad %174, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%176 = spirv.NV.CooperativeMatrixMulAdd %154, %166, %125 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%177 = spirv.NV.CooperativeMatrixMulAdd %157, %172, %176 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%178 = spirv.NV.CooperativeMatrixMulAdd %154, %169, %127 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%179 = spirv.NV.CooperativeMatrixMulAdd %157, %175, %178 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%180 = spirv.NV.CooperativeMatrixMulAdd %160, %166, %129 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%181 = spirv.NV.CooperativeMatrixMulAdd %163, %172, %180 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%182 = spirv.NV.CooperativeMatrixMulAdd %160, %169, %131 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%183 = spirv.NV.CooperativeMatrixMulAdd %163, %175, %182 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%184 = spirv.IMul %32, %cst10616832_i32 : i32
%185 = spirv.IMul %6, %cst10616832_i32 : i32
%186 = spirv.IAdd %184, %185 : i32
%187 = spirv.IMul %34, %cst73728_i32 : i32
%188 = spirv.IAdd %186, %187 : i32
%189 = spirv.IMul %4, %cst36864_i32 : i32
%190 = spirv.IAdd %188, %189 : i32
%191 = spirv.IAdd %190, %71 : i32
%192 = spirv.IAdd %191, %30 : i32
%193 = spirv.IAdd %192, %111 : i32
%194 = spirv.IAdd %193, %cst18434_i32 : i32
%195 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %195, %183, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%196 = spirv.IAdd %193, %cst18432_i32 : i32
%197 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %196] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %197, %181, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%198 = spirv.IAdd %193, %cst2_i32 : i32
%199 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %198] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %199, %179, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%200 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %193] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %200, %177, %cst1152_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_46_batch_matmul_10x9216x9216x64, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_46_batch_matmul_10x9216x9216x64 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_47 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_47 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation3, workgroup_size = [128 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c9216 = arith.constant 9216 : index
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
hal.return %c9216, %c5, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Float16, Shader, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_47() "None" {
%cst1152_i32 = spirv.Constant 1152 : i32
%cst10616832_i32 = spirv.Constant 10616832 : i32
%cst53084160_i32 = spirv.Constant 53084160 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst_vec_4xf32 = spirv.Constant dense<-2.67890066E+36> : vector<4xf32>
%cst_vec_4xf32_0 = spirv.Constant dense<0.000000e+00> : vector<4xf32>
%cst_vec_4xf32_1 = spirv.Constant dense<0.693147182> : vector<4xf32>
%cst_vec_4xf32_2 = spirv.Constant dense<1.44269502> : vector<4xf32>
%cst_vec_4xf32_3 = spirv.Constant dense<1.000000e+00> : vector<4xf32>
%cst_vec_4xf32_4 = spirv.Constant dense<0.499705136> : vector<4xf32>
%cst_vec_4xf32_5 = spirv.Constant dense<0.168738902> : vector<4xf32>
%cst_vec_4xf32_6 = spirv.Constant dense<0.0366896503> : vector<4xf32>
%cst_vec_4xf32_7 = spirv.Constant dense<1.314350e-02> : vector<4xf32>
%cst_vec_4xi32 = spirv.Constant dense<23> : vector<4xi32>
%cst_vec_4xf32_8 = spirv.Constant dense<0x7F800000> : vector<4xf32>
%cst_vec_4xf32_9 = spirv.Constant dense<0xFF800000> : vector<4xf32>
%cst_vec_4xf32_10 = spirv.Constant dense<1.17549435E-38> : vector<4xf32>
%cst_vec_4xi32_11 = spirv.Constant dense<127> : vector<4xi32>
%cst_vec_4xi32_12 = spirv.Constant dense<-127> : vector<4xi32>
%cst1_i32 = spirv.Constant 1 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1024_i32 = spirv.Constant 1024 : i32
%cst9216_i32 = spirv.Constant 9216 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%0 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi32>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%5 = spirv.Load "PushConstant" %4 : i32
%6 = spirv.SLessThan %3, %cst0_i32 : i32
%7 = spirv.ISub %cst-1_i32, %3 : i32
%8 = spirv.Select %6, %7, %3 : i1, i32
%9 = spirv.SDiv %8, %cst16_i32 : i32
%10 = spirv.ISub %cst-1_i32, %9 : i32
%11 = spirv.Select %6, %10, %9 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%12 = spirv.SLessThan %5, %cst0_i32 : i32
%13 = spirv.ISub %cst-1_i32, %5 : i32
%14 = spirv.Select %12, %13, %5 : i1, i32
%15 = spirv.SDiv %14, %cst16_i32 : i32
%16 = spirv.ISub %cst-1_i32, %15 : i32
%17 = spirv.Select %12, %16, %15 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[1 : i32] : vector<3xi32>
%22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%24 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32 : i32, vector<4xf32>)
^bb1(%206: i32, %207: vector<4xf32>): // 2 preds: ^bb0, ^bb2
%208 = spirv.SLessThan %206, %cst9216_i32 : i32
spirv.BranchConditional %208, ^bb2, ^bb3
^bb2: // pred: ^bb1
%209 = spirv.SLessThan %206, %cst0_i32 : i32
%210 = spirv.ISub %cst-1_i32, %206 : i32
%211 = spirv.Select %209, %210, %206 : i1, i32
%212 = spirv.SDiv %211, %cst8_i32 : i32
%213 = spirv.ISub %cst-1_i32, %212 : i32
%214 = spirv.Select %209, %213, %212 : i1, i32
%215 = spirv.IMul %19, %cst53084160_i32 : i32
%216 = spirv.IAdd %1, %215 : i32
%217 = spirv.IMul %21, %cst10616832_i32 : i32
%218 = spirv.IAdd %216, %217 : i32
%219 = spirv.IMul %23, %cst1152_i32 : i32
%220 = spirv.IAdd %218, %219 : i32
%221 = spirv.IAdd %214, %220 : i32
%222 = spirv.IAdd %221, %11 : i32
%223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%224 = spirv.Load "StorageBuffer" %223 : vector<4xf32>
%225 = spirv.VectorShuffle [0 : i32, 1 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32>
%226 = spirv.Bitcast %225 : vector<2xf32> to vector<4xf16>
%227 = spirv.CompositeExtract %207[0 : i32] : vector<4xf32>
%228 = spirv.Bitcast %227 : f32 to vector<2xf16>
%229 = spirv.CompositeExtract %228[0 : i32] : vector<2xf16>
%230 = spirv.CompositeExtract %228[1 : i32] : vector<2xf16>
%231 = spirv.CompositeExtract %207[1 : i32] : vector<4xf32>
%232 = spirv.Bitcast %231 : f32 to vector<2xf16>
%233 = spirv.CompositeExtract %232[0 : i32] : vector<2xf16>
%234 = spirv.CompositeExtract %232[1 : i32] : vector<2xf16>
%235 = spirv.CompositeConstruct %229, %230, %233, %234 : (f16, f16, f16, f16) -> vector<4xf16>
%236 = spirv.GL.FMax %226, %235 : vector<4xf16>
%237 = spirv.IsNan %226 : vector<4xf16>
%238 = spirv.IsNan %235 : vector<4xf16>
%239 = spirv.Select %237, %226, %236 : vector<4xi1>, vector<4xf16>
%240 = spirv.Select %238, %235, %239 : vector<4xi1>, vector<4xf16>
%241 = spirv.VectorShuffle [2 : i32, 3 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32>
%242 = spirv.Bitcast %241 : vector<2xf32> to vector<4xf16>
%243 = spirv.CompositeExtract %207[2 : i32] : vector<4xf32>
%244 = spirv.Bitcast %243 : f32 to vector<2xf16>
%245 = spirv.CompositeExtract %244[0 : i32] : vector<2xf16>
%246 = spirv.CompositeExtract %244[1 : i32] : vector<2xf16>
%247 = spirv.CompositeExtract %207[3 : i32] : vector<4xf32>
%248 = spirv.Bitcast %247 : f32 to vector<2xf16>
%249 = spirv.CompositeExtract %248[0 : i32] : vector<2xf16>
%250 = spirv.CompositeExtract %248[1 : i32] : vector<2xf16>
%251 = spirv.CompositeConstruct %245, %246, %249, %250 : (f16, f16, f16, f16) -> vector<4xf16>
%252 = spirv.GL.FMax %242, %251 : vector<4xf16>
%253 = spirv.IsNan %242 : vector<4xf16>
%254 = spirv.IsNan %251 : vector<4xf16>
%255 = spirv.Select %253, %242, %252 : vector<4xi1>, vector<4xf16>
%256 = spirv.Select %254, %251, %255 : vector<4xi1>, vector<4xf16>
%257 = spirv.Bitcast %256 : vector<4xf16> to vector<2xf32>
%258 = spirv.Bitcast %240 : vector<4xf16> to vector<2xf32>
%259 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %258 : vector<2xf32> -> vector<4xf32>
%260 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %259 : vector<4xf32>, %257 : vector<2xf32> -> vector<4xf32>
spirv.Store "Function" %24, %260 : vector<4xf32>
%261 = spirv.IAdd %206, %cst1024_i32 : i32
spirv.Branch ^bb1(%261, %260 : i32, vector<4xf32>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%25 = spirv.Load "Function" %24 : vector<4xf32>
%26 = spirv.CompositeExtract %25[0 : i32] : vector<4xf32>
%27 = spirv.Bitcast %26 : f32 to vector<2xf16>
%28 = spirv.CompositeExtract %27[0 : i32] : vector<2xf16>
%29 = spirv.CompositeExtract %27[1 : i32] : vector<2xf16>
%30 = spirv.CompositeExtract %25[1 : i32] : vector<4xf32>
%31 = spirv.Bitcast %30 : f32 to vector<2xf16>
%32 = spirv.CompositeExtract %31[0 : i32] : vector<2xf16>
%33 = spirv.CompositeExtract %31[1 : i32] : vector<2xf16>
%34 = spirv.GL.FMax %28, %29 : f16
%35 = spirv.GL.FMax %34, %32 : f16
%36 = spirv.GL.FMax %35, %33 : f16
%37 = spirv.CompositeExtract %25[2 : i32] : vector<4xf32>
%38 = spirv.Bitcast %37 : f32 to vector<2xf16>
%39 = spirv.CompositeExtract %38[0 : i32] : vector<2xf16>
%40 = spirv.CompositeExtract %38[1 : i32] : vector<2xf16>
%41 = spirv.CompositeExtract %25[3 : i32] : vector<4xf32>
%42 = spirv.Bitcast %41 : f32 to vector<2xf16>
%43 = spirv.CompositeExtract %42[0 : i32] : vector<2xf16>
%44 = spirv.CompositeExtract %42[1 : i32] : vector<2xf16>
%45 = spirv.GL.FMax %39, %40 : f16
%46 = spirv.GL.FMax %45, %43 : f16
%47 = spirv.GL.FMax %46, %44 : f16
%48 = spirv.CompositeConstruct %36, %47 : (f16, f16) -> vector<2xf16>
%49 = spirv.Bitcast %48 : vector<2xf16> to i32
%50 = spirv.GroupNonUniformShuffleXor <Subgroup> %49, %cst1_i32 : i32, i32
%51 = spirv.Bitcast %50 : i32 to vector<2xf16>
%52 = spirv.GL.FMax %48, %51 : vector<2xf16>
%53 = spirv.IsNan %48 : vector<2xf16>
%54 = spirv.IsNan %51 : vector<2xf16>
%55 = spirv.Select %53, %48, %52 : vector<2xi1>, vector<2xf16>
%56 = spirv.Select %54, %51, %55 : vector<2xi1>, vector<2xf16>
%57 = spirv.Bitcast %56 : vector<2xf16> to i32
%58 = spirv.GroupNonUniformShuffleXor <Subgroup> %57, %cst2_i32 : i32, i32
%59 = spirv.Bitcast %58 : i32 to vector<2xf16>
%60 = spirv.GL.FMax %56, %59 : vector<2xf16>
%61 = spirv.IsNan %56 : vector<2xf16>
%62 = spirv.IsNan %59 : vector<2xf16>
%63 = spirv.Select %61, %56, %60 : vector<2xi1>, vector<2xf16>
%64 = spirv.Select %62, %59, %63 : vector<2xi1>, vector<2xf16>
%65 = spirv.Bitcast %64 : vector<2xf16> to i32
%66 = spirv.GroupNonUniformShuffleXor <Subgroup> %65, %cst4_i32 : i32, i32
%67 = spirv.Bitcast %66 : i32 to vector<2xf16>
%68 = spirv.GL.FMax %64, %67 : vector<2xf16>
%69 = spirv.IsNan %64 : vector<2xf16>
%70 = spirv.IsNan %67 : vector<2xf16>
%71 = spirv.Select %69, %64, %68 : vector<2xi1>, vector<2xf16>
%72 = spirv.Select %70, %67, %71 : vector<2xi1>, vector<2xf16>
%73 = spirv.Bitcast %72 : vector<2xf16> to i32
%74 = spirv.GroupNonUniformShuffleXor <Subgroup> %73, %cst8_i32 : i32, i32
%75 = spirv.Bitcast %74 : i32 to vector<2xf16>
%76 = spirv.GL.FMax %72, %75 : vector<2xf16>
%77 = spirv.IsNan %72 : vector<2xf16>
%78 = spirv.IsNan %75 : vector<2xf16>
%79 = spirv.Select %77, %72, %76 : vector<2xi1>, vector<2xf16>
%80 = spirv.Select %78, %75, %79 : vector<2xi1>, vector<2xf16>
%81 = spirv.Bitcast %80 : vector<2xf16> to i32
%82 = spirv.GroupNonUniformShuffleXor <Subgroup> %81, %cst16_i32 : i32, i32
%83 = spirv.Bitcast %82 : i32 to vector<2xf16>
%84 = spirv.GL.FMax %80, %83 : vector<2xf16>
%85 = spirv.IsNan %80 : vector<2xf16>
%86 = spirv.IsNan %83 : vector<2xf16>
%87 = spirv.Select %85, %80, %84 : vector<2xi1>, vector<2xf16>
%88 = spirv.Select %86, %83, %87 : vector<2xi1>, vector<2xf16>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
%89 = spirv.UDiv %1, %cst32_i32 : i32
%90 = spirv.UMod %1, %cst32_i32 : i32
%91 = spirv.IEqual %90, %cst0_i32 : i32
spirv.mlir.selection {
spirv.BranchConditional %91, ^bb1, ^bb2
^bb1: // pred: ^bb0
%206 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %206, %88 : vector<2xf16>
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%92 = spirv.GL.UMin %90, %cst3_i32 : i32
%93 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
%94 = spirv.Load "Workgroup" %93 : vector<2xf16>
%95 = spirv.Bitcast %94 : vector<2xf16> to i32
%96 = spirv.GroupNonUniformShuffleXor <Subgroup> %95, %cst1_i32 : i32, i32
%97 = spirv.Bitcast %96 : i32 to vector<2xf16>
%98 = spirv.GL.FMax %94, %97 : vector<2xf16>
%99 = spirv.IsNan %94 : vector<2xf16>
%100 = spirv.IsNan %97 : vector<2xf16>
%101 = spirv.Select %99, %94, %98 : vector<2xi1>, vector<2xf16>
%102 = spirv.Select %100, %97, %101 : vector<2xi1>, vector<2xf16>
%103 = spirv.Bitcast %102 : vector<2xf16> to i32
%104 = spirv.GroupNonUniformShuffleXor <Subgroup> %103, %cst2_i32 : i32, i32
%105 = spirv.Bitcast %104 : i32 to vector<2xf16>
%106 = spirv.GL.FMax %102, %105 : vector<2xf16>
%107 = spirv.IsNan %102 : vector<2xf16>
%108 = spirv.IsNan %105 : vector<2xf16>
%109 = spirv.Select %107, %102, %106 : vector<2xi1>, vector<2xf16>
%110 = spirv.Select %108, %105, %109 : vector<2xi1>, vector<2xf16>
%111 = spirv.Bitcast %110 : vector<2xf16> to i32
%112 = spirv.GroupNonUniformShuffle <Subgroup> %111, %cst0_i32 : i32, i32
%113 = spirv.Bitcast %112 : i32 to vector<2xf16>
%114 = spirv.CompositeExtract %113[0 : i32] : vector<2xf16>
%115 = spirv.CompositeExtract %113[1 : i32] : vector<2xf16>
%116 = spirv.GL.FMax %114, %115 : f16
%117 = spirv.CompositeConstruct %116, %116, %116, %116 : (f16, f16, f16, f16) -> vector<4xf16>
%118 = spirv.Variable : !spirv.ptr<vector<4xf32>, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %cst_vec_4xf32_0 : i32, vector<4xf32>)
^bb1(%206: i32, %207: vector<4xf32>): // 2 preds: ^bb0, ^bb2
%208 = spirv.SLessThan %206, %cst9216_i32 : i32
spirv.BranchConditional %208, ^bb2, ^bb3
^bb2: // pred: ^bb1
%209 = spirv.SLessThan %206, %cst0_i32 : i32
%210 = spirv.ISub %cst-1_i32, %206 : i32
%211 = spirv.Select %209, %210, %206 : i1, i32
%212 = spirv.SDiv %211, %cst8_i32 : i32
%213 = spirv.ISub %cst-1_i32, %212 : i32
%214 = spirv.Select %209, %213, %212 : i1, i32
%215 = spirv.IMul %19, %cst53084160_i32 : i32
%216 = spirv.IAdd %1, %215 : i32
%217 = spirv.IMul %21, %cst10616832_i32 : i32
%218 = spirv.IAdd %216, %217 : i32
%219 = spirv.IMul %23, %cst1152_i32 : i32
%220 = spirv.IAdd %218, %219 : i32
%221 = spirv.IAdd %214, %220 : i32
%222 = spirv.IAdd %221, %11 : i32
%223 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%224 = spirv.Load "StorageBuffer" %223 : vector<4xf32>
%225 = spirv.VectorShuffle [0 : i32, 1 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32>
%226 = spirv.Bitcast %225 : vector<2xf32> to vector<4xf16>
%227 = spirv.FSub %226, %117 : vector<4xf16>
%228 = spirv.VectorShuffle [2 : i32, 3 : i32] %224 : vector<4xf32>, %224 : vector<4xf32> -> vector<2xf32>
%229 = spirv.Bitcast %228 : vector<2xf32> to vector<4xf16>
%230 = spirv.FSub %229, %117 : vector<4xf16>
%231 = spirv.FConvert %227 : vector<4xf16> to vector<4xf32>
%232 = spirv.IsNan %231 : vector<4xf32>
%233 = spirv.LogicalOr %232, %232 : vector<4xi1>
%234 = spirv.FMul %231, %cst_vec_4xf32_2 : vector<4xf32>
%235 = spirv.GL.Floor %234 : vector<4xf32>
%236 = spirv.FMul %235, %cst_vec_4xf32_1 : vector<4xf32>
%237 = spirv.FSub %231, %236 : vector<4xf32>
%238 = spirv.FMul %237, %237 : vector<4xf32>
%239 = spirv.FMul %238, %238 : vector<4xf32>
%240 = spirv.GL.Fma %cst_vec_4xf32_3, %237, %cst_vec_4xf32_3 : vector<4xf32>
%241 = spirv.GL.Fma %cst_vec_4xf32_5, %237, %cst_vec_4xf32_4 : vector<4xf32>
%242 = spirv.GL.Fma %cst_vec_4xf32_7, %237, %cst_vec_4xf32_6 : vector<4xf32>
%243 = spirv.GL.Fma %241, %238, %240 : vector<4xf32>
%244 = spirv.GL.Fma %242, %239, %243 : vector<4xf32>
%245 = spirv.ConvertFToS %235 : vector<4xf32> to vector<4xi32>
%246 = spirv.IAdd %245, %cst_vec_4xi32_11 : vector<4xi32>
%247 = spirv.ShiftLeftLogical %246, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%248 = spirv.Bitcast %247 : vector<4xi32> to vector<4xf32>
%249 = spirv.FMul %244, %248 : vector<4xf32>
%250 = spirv.SLessThanEqual %245, %cst_vec_4xi32_11 : vector<4xi32>
%251 = spirv.SGreaterThanEqual %245, %cst_vec_4xi32_12 : vector<4xi32>
%252 = spirv.FOrdEqual %231, %cst_vec_4xf32_9 : vector<4xf32>
%253 = spirv.FOrdEqual %231, %cst_vec_4xf32_8 : vector<4xf32>
%254 = spirv.FOrdGreaterThan %231, %cst_vec_4xf32_0 : vector<4xf32>
%255 = spirv.LogicalAnd %250, %251 : vector<4xi1>
%256 = spirv.Select %254, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%257 = spirv.Select %255, %249, %256 : vector<4xi1>, vector<4xf32>
%258 = spirv.Select %253, %cst_vec_4xf32_8, %257 : vector<4xi1>, vector<4xf32>
%259 = spirv.Select %252, %cst_vec_4xf32_0, %258 : vector<4xi1>, vector<4xf32>
%260 = spirv.Select %233, %231, %259 : vector<4xi1>, vector<4xf32>
%261 = spirv.FConvert %260 : vector<4xf32> to vector<4xf16>
%262 = spirv.FConvert %230 : vector<4xf16> to vector<4xf32>
%263 = spirv.IsNan %262 : vector<4xf32>
%264 = spirv.LogicalOr %263, %263 : vector<4xi1>
%265 = spirv.FMul %262, %cst_vec_4xf32_2 : vector<4xf32>
%266 = spirv.GL.Floor %265 : vector<4xf32>
%267 = spirv.FMul %266, %cst_vec_4xf32_1 : vector<4xf32>
%268 = spirv.FSub %262, %267 : vector<4xf32>
%269 = spirv.FMul %268, %268 : vector<4xf32>
%270 = spirv.FMul %269, %269 : vector<4xf32>
%271 = spirv.GL.Fma %cst_vec_4xf32_3, %268, %cst_vec_4xf32_3 : vector<4xf32>
%272 = spirv.GL.Fma %cst_vec_4xf32_5, %268, %cst_vec_4xf32_4 : vector<4xf32>
%273 = spirv.GL.Fma %cst_vec_4xf32_7, %268, %cst_vec_4xf32_6 : vector<4xf32>
%274 = spirv.GL.Fma %272, %269, %271 : vector<4xf32>
%275 = spirv.GL.Fma %273, %270, %274 : vector<4xf32>
%276 = spirv.ConvertFToS %266 : vector<4xf32> to vector<4xi32>
%277 = spirv.IAdd %276, %cst_vec_4xi32_11 : vector<4xi32>
%278 = spirv.ShiftLeftLogical %277, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%279 = spirv.Bitcast %278 : vector<4xi32> to vector<4xf32>
%280 = spirv.FMul %275, %279 : vector<4xf32>
%281 = spirv.SLessThanEqual %276, %cst_vec_4xi32_11 : vector<4xi32>
%282 = spirv.SGreaterThanEqual %276, %cst_vec_4xi32_12 : vector<4xi32>
%283 = spirv.FOrdEqual %262, %cst_vec_4xf32_9 : vector<4xf32>
%284 = spirv.FOrdEqual %262, %cst_vec_4xf32_8 : vector<4xf32>
%285 = spirv.FOrdGreaterThan %262, %cst_vec_4xf32_0 : vector<4xf32>
%286 = spirv.LogicalAnd %281, %282 : vector<4xi1>
%287 = spirv.Select %285, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%288 = spirv.Select %286, %280, %287 : vector<4xi1>, vector<4xf32>
%289 = spirv.Select %284, %cst_vec_4xf32_8, %288 : vector<4xi1>, vector<4xf32>
%290 = spirv.Select %283, %cst_vec_4xf32_0, %289 : vector<4xi1>, vector<4xf32>
%291 = spirv.Select %264, %262, %290 : vector<4xi1>, vector<4xf32>
%292 = spirv.FConvert %291 : vector<4xf32> to vector<4xf16>
%293 = spirv.CompositeExtract %207[0 : i32] : vector<4xf32>
%294 = spirv.Bitcast %293 : f32 to vector<2xf16>
%295 = spirv.CompositeExtract %294[0 : i32] : vector<2xf16>
%296 = spirv.CompositeExtract %294[1 : i32] : vector<2xf16>
%297 = spirv.CompositeExtract %207[1 : i32] : vector<4xf32>
%298 = spirv.Bitcast %297 : f32 to vector<2xf16>
%299 = spirv.CompositeExtract %298[0 : i32] : vector<2xf16>
%300 = spirv.CompositeExtract %298[1 : i32] : vector<2xf16>
%301 = spirv.CompositeConstruct %295, %296, %299, %300 : (f16, f16, f16, f16) -> vector<4xf16>
%302 = spirv.FAdd %261, %301 : vector<4xf16>
%303 = spirv.CompositeExtract %207[2 : i32] : vector<4xf32>
%304 = spirv.Bitcast %303 : f32 to vector<2xf16>
%305 = spirv.CompositeExtract %304[0 : i32] : vector<2xf16>
%306 = spirv.CompositeExtract %304[1 : i32] : vector<2xf16>
%307 = spirv.CompositeExtract %207[3 : i32] : vector<4xf32>
%308 = spirv.Bitcast %307 : f32 to vector<2xf16>
%309 = spirv.CompositeExtract %308[0 : i32] : vector<2xf16>
%310 = spirv.CompositeExtract %308[1 : i32] : vector<2xf16>
%311 = spirv.CompositeConstruct %305, %306, %309, %310 : (f16, f16, f16, f16) -> vector<4xf16>
%312 = spirv.FAdd %292, %311 : vector<4xf16>
%313 = spirv.Bitcast %312 : vector<4xf16> to vector<2xf32>
%314 = spirv.Bitcast %302 : vector<4xf16> to vector<2xf32>
%315 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %314 : vector<2xf32> -> vector<4xf32>
%316 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %315 : vector<4xf32>, %313 : vector<2xf32> -> vector<4xf32>
spirv.Store "Function" %118, %316 : vector<4xf32>
%317 = spirv.IAdd %206, %cst1024_i32 : i32
spirv.Branch ^bb1(%317, %316 : i32, vector<4xf32>)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%119 = spirv.Load "Function" %118 : vector<4xf32>
%120 = spirv.CompositeExtract %119[0 : i32] : vector<4xf32>
%121 = spirv.Bitcast %120 : f32 to vector<2xf16>
%122 = spirv.CompositeExtract %121[0 : i32] : vector<2xf16>
%123 = spirv.CompositeExtract %121[1 : i32] : vector<2xf16>
%124 = spirv.CompositeExtract %119[1 : i32] : vector<4xf32>
%125 = spirv.Bitcast %124 : f32 to vector<2xf16>
%126 = spirv.CompositeExtract %125[0 : i32] : vector<2xf16>
%127 = spirv.CompositeExtract %125[1 : i32] : vector<2xf16>
%128 = spirv.FAdd %122, %123 : f16
%129 = spirv.FAdd %128, %126 : f16
%130 = spirv.FAdd %129, %127 : f16
%131 = spirv.CompositeExtract %119[2 : i32] : vector<4xf32>
%132 = spirv.Bitcast %131 : f32 to vector<2xf16>
%133 = spirv.CompositeExtract %132[0 : i32] : vector<2xf16>
%134 = spirv.CompositeExtract %132[1 : i32] : vector<2xf16>
%135 = spirv.CompositeExtract %119[3 : i32] : vector<4xf32>
%136 = spirv.Bitcast %135 : f32 to vector<2xf16>
%137 = spirv.CompositeExtract %136[0 : i32] : vector<2xf16>
%138 = spirv.CompositeExtract %136[1 : i32] : vector<2xf16>
%139 = spirv.FAdd %133, %134 : f16
%140 = spirv.FAdd %139, %137 : f16
%141 = spirv.FAdd %140, %138 : f16
%142 = spirv.CompositeConstruct %130, %141 : (f16, f16) -> vector<2xf16>
%143 = spirv.Bitcast %142 : vector<2xf16> to i32
%144 = spirv.GroupNonUniformShuffleXor <Subgroup> %143, %cst1_i32 : i32, i32
%145 = spirv.Bitcast %144 : i32 to vector<2xf16>
%146 = spirv.FAdd %142, %145 : vector<2xf16>
%147 = spirv.Bitcast %146 : vector<2xf16> to i32
%148 = spirv.GroupNonUniformShuffleXor <Subgroup> %147, %cst2_i32 : i32, i32
%149 = spirv.Bitcast %148 : i32 to vector<2xf16>
%150 = spirv.FAdd %146, %149 : vector<2xf16>
%151 = spirv.Bitcast %150 : vector<2xf16> to i32
%152 = spirv.GroupNonUniformShuffleXor <Subgroup> %151, %cst4_i32 : i32, i32
%153 = spirv.Bitcast %152 : i32 to vector<2xf16>
%154 = spirv.FAdd %150, %153 : vector<2xf16>
%155 = spirv.Bitcast %154 : vector<2xf16> to i32
%156 = spirv.GroupNonUniformShuffleXor <Subgroup> %155, %cst8_i32 : i32, i32
%157 = spirv.Bitcast %156 : i32 to vector<2xf16>
%158 = spirv.FAdd %154, %157 : vector<2xf16>
%159 = spirv.Bitcast %158 : vector<2xf16> to i32
%160 = spirv.GroupNonUniformShuffleXor <Subgroup> %159, %cst16_i32 : i32, i32
%161 = spirv.Bitcast %160 : i32 to vector<2xf16>
%162 = spirv.FAdd %158, %161 : vector<2xf16>
%__workgroup_mem__6_addr = spirv.mlir.addressof @__workgroup_mem__6 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
spirv.mlir.selection {
spirv.BranchConditional %91, ^bb1, ^bb2
^bb1: // pred: ^bb0
%206 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %206, %162 : vector<2xf16>
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%163 = spirv.AccessChain %__workgroup_mem__6_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
%164 = spirv.Load "Workgroup" %163 : vector<2xf16>
%165 = spirv.Bitcast %164 : vector<2xf16> to i32
%166 = spirv.GroupNonUniformShuffleXor <Subgroup> %165, %cst1_i32 : i32, i32
%167 = spirv.Bitcast %166 : i32 to vector<2xf16>
%168 = spirv.FAdd %164, %167 : vector<2xf16>
%169 = spirv.Bitcast %168 : vector<2xf16> to i32
%170 = spirv.GroupNonUniformShuffleXor <Subgroup> %169, %cst2_i32 : i32, i32
%171 = spirv.Bitcast %170 : i32 to vector<2xf16>
%172 = spirv.FAdd %168, %171 : vector<2xf16>
%173 = spirv.Bitcast %172 : vector<2xf16> to i32
%174 = spirv.GroupNonUniformShuffle <Subgroup> %173, %cst0_i32 : i32, i32
%175 = spirv.Bitcast %174 : i32 to vector<2xf16>
%176 = spirv.CompositeExtract %175[0 : i32] : vector<2xf16>
%177 = spirv.CompositeExtract %175[1 : i32] : vector<2xf16>
%178 = spirv.FAdd %176, %177 : f16
%179 = spirv.FAdd %178, %cst_f16 : f16
%__workgroup_mem__7_addr = spirv.mlir.addressof @__workgroup_mem__7 : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>
spirv.mlir.selection {
spirv.BranchConditional %91, ^bb1, ^bb2
^bb1: // pred: ^bb0
%206 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %206, %88 : vector<2xf16>
spirv.Branch ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
spirv.mlir.merge
}
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%180 = spirv.AccessChain %__workgroup_mem__7_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<2xf16>>)>, Workgroup>, i32, i32
%181 = spirv.Load "Workgroup" %180 : vector<2xf16>
%182 = spirv.Bitcast %181 : vector<2xf16> to i32
%183 = spirv.GroupNonUniformShuffleXor <Subgroup> %182, %cst1_i32 : i32, i32
%184 = spirv.Bitcast %183 : i32 to vector<2xf16>
%185 = spirv.GL.FMax %181, %184 : vector<2xf16>
%186 = spirv.IsNan %181 : vector<2xf16>
%187 = spirv.IsNan %184 : vector<2xf16>
%188 = spirv.Select %186, %181, %185 : vector<2xi1>, vector<2xf16>
%189 = spirv.Select %187, %184, %188 : vector<2xi1>, vector<2xf16>
%190 = spirv.Bitcast %189 : vector<2xf16> to i32
%191 = spirv.GroupNonUniformShuffleXor <Subgroup> %190, %cst2_i32 : i32, i32
%192 = spirv.Bitcast %191 : i32 to vector<2xf16>
%193 = spirv.GL.FMax %189, %192 : vector<2xf16>
%194 = spirv.IsNan %189 : vector<2xf16>
%195 = spirv.IsNan %192 : vector<2xf16>
%196 = spirv.Select %194, %189, %193 : vector<2xi1>, vector<2xf16>
%197 = spirv.Select %195, %192, %196 : vector<2xi1>, vector<2xf16>
%198 = spirv.Bitcast %197 : vector<2xf16> to i32
%199 = spirv.GroupNonUniformShuffle <Subgroup> %198, %cst0_i32 : i32, i32
%200 = spirv.Bitcast %199 : i32 to vector<2xf16>
%201 = spirv.CompositeExtract %200[0 : i32] : vector<2xf16>
%202 = spirv.CompositeExtract %200[1 : i32] : vector<2xf16>
%203 = spirv.GL.FMax %201, %202 : f16
%204 = spirv.CompositeConstruct %203, %203, %203, %203 : (f16, f16, f16, f16) -> vector<4xf16>
%205 = spirv.CompositeConstruct %179, %179, %179, %179 : (f16, f16, f16, f16) -> vector<4xf16>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32 : i32)
^bb1(%206: i32): // 2 preds: ^bb0, ^bb2
%207 = spirv.SLessThan %206, %cst9216_i32 : i32
spirv.BranchConditional %207, ^bb2, ^bb3
^bb2: // pred: ^bb1
%208 = spirv.SLessThan %206, %cst0_i32 : i32
%209 = spirv.ISub %cst-1_i32, %206 : i32
%210 = spirv.Select %208, %209, %206 : i1, i32
%211 = spirv.SDiv %210, %cst8_i32 : i32
%212 = spirv.ISub %cst-1_i32, %211 : i32
%213 = spirv.Select %208, %212, %211 : i1, i32
%214 = spirv.IMul %19, %cst53084160_i32 : i32
%215 = spirv.IAdd %1, %214 : i32
%216 = spirv.IMul %21, %cst10616832_i32 : i32
%217 = spirv.IAdd %215, %216 : i32
%218 = spirv.IMul %23, %cst1152_i32 : i32
%219 = spirv.IAdd %217, %218 : i32
%220 = spirv.IAdd %213, %219 : i32
%221 = spirv.IAdd %220, %11 : i32
%222 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %221] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%223 = spirv.Load "StorageBuffer" %222 : vector<4xf32>
%224 = spirv.VectorShuffle [0 : i32, 1 : i32] %223 : vector<4xf32>, %223 : vector<4xf32> -> vector<2xf32>
%225 = spirv.Bitcast %224 : vector<2xf32> to vector<4xf16>
%226 = spirv.FSub %225, %204 : vector<4xf16>
%227 = spirv.VectorShuffle [2 : i32, 3 : i32] %223 : vector<4xf32>, %223 : vector<4xf32> -> vector<2xf32>
%228 = spirv.Bitcast %227 : vector<2xf32> to vector<4xf16>
%229 = spirv.FSub %228, %204 : vector<4xf16>
%230 = spirv.FConvert %226 : vector<4xf16> to vector<4xf32>
%231 = spirv.IsNan %230 : vector<4xf32>
%232 = spirv.LogicalOr %231, %231 : vector<4xi1>
%233 = spirv.FMul %230, %cst_vec_4xf32_2 : vector<4xf32>
%234 = spirv.GL.Floor %233 : vector<4xf32>
%235 = spirv.FMul %234, %cst_vec_4xf32_1 : vector<4xf32>
%236 = spirv.FSub %230, %235 : vector<4xf32>
%237 = spirv.FMul %236, %236 : vector<4xf32>
%238 = spirv.FMul %237, %237 : vector<4xf32>
%239 = spirv.GL.Fma %cst_vec_4xf32_3, %236, %cst_vec_4xf32_3 : vector<4xf32>
%240 = spirv.GL.Fma %cst_vec_4xf32_5, %236, %cst_vec_4xf32_4 : vector<4xf32>
%241 = spirv.GL.Fma %cst_vec_4xf32_7, %236, %cst_vec_4xf32_6 : vector<4xf32>
%242 = spirv.GL.Fma %240, %237, %239 : vector<4xf32>
%243 = spirv.GL.Fma %241, %238, %242 : vector<4xf32>
%244 = spirv.ConvertFToS %234 : vector<4xf32> to vector<4xi32>
%245 = spirv.IAdd %244, %cst_vec_4xi32_11 : vector<4xi32>
%246 = spirv.ShiftLeftLogical %245, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%247 = spirv.Bitcast %246 : vector<4xi32> to vector<4xf32>
%248 = spirv.FMul %243, %247 : vector<4xf32>
%249 = spirv.SLessThanEqual %244, %cst_vec_4xi32_11 : vector<4xi32>
%250 = spirv.SGreaterThanEqual %244, %cst_vec_4xi32_12 : vector<4xi32>
%251 = spirv.FOrdEqual %230, %cst_vec_4xf32_9 : vector<4xf32>
%252 = spirv.FOrdEqual %230, %cst_vec_4xf32_8 : vector<4xf32>
%253 = spirv.FOrdGreaterThan %230, %cst_vec_4xf32_0 : vector<4xf32>
%254 = spirv.LogicalAnd %249, %250 : vector<4xi1>
%255 = spirv.Select %253, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%256 = spirv.Select %254, %248, %255 : vector<4xi1>, vector<4xf32>
%257 = spirv.Select %252, %cst_vec_4xf32_8, %256 : vector<4xi1>, vector<4xf32>
%258 = spirv.Select %251, %cst_vec_4xf32_0, %257 : vector<4xi1>, vector<4xf32>
%259 = spirv.Select %232, %230, %258 : vector<4xi1>, vector<4xf32>
%260 = spirv.FConvert %259 : vector<4xf32> to vector<4xf16>
%261 = spirv.FConvert %229 : vector<4xf16> to vector<4xf32>
%262 = spirv.IsNan %261 : vector<4xf32>
%263 = spirv.LogicalOr %262, %262 : vector<4xi1>
%264 = spirv.FMul %261, %cst_vec_4xf32_2 : vector<4xf32>
%265 = spirv.GL.Floor %264 : vector<4xf32>
%266 = spirv.FMul %265, %cst_vec_4xf32_1 : vector<4xf32>
%267 = spirv.FSub %261, %266 : vector<4xf32>
%268 = spirv.FMul %267, %267 : vector<4xf32>
%269 = spirv.FMul %268, %268 : vector<4xf32>
%270 = spirv.GL.Fma %cst_vec_4xf32_3, %267, %cst_vec_4xf32_3 : vector<4xf32>
%271 = spirv.GL.Fma %cst_vec_4xf32_5, %267, %cst_vec_4xf32_4 : vector<4xf32>
%272 = spirv.GL.Fma %cst_vec_4xf32_7, %267, %cst_vec_4xf32_6 : vector<4xf32>
%273 = spirv.GL.Fma %271, %268, %270 : vector<4xf32>
%274 = spirv.GL.Fma %272, %269, %273 : vector<4xf32>
%275 = spirv.ConvertFToS %265 : vector<4xf32> to vector<4xi32>
%276 = spirv.IAdd %275, %cst_vec_4xi32_11 : vector<4xi32>
%277 = spirv.ShiftLeftLogical %276, %cst_vec_4xi32 : vector<4xi32>, vector<4xi32>
%278 = spirv.Bitcast %277 : vector<4xi32> to vector<4xf32>
%279 = spirv.FMul %274, %278 : vector<4xf32>
%280 = spirv.SLessThanEqual %275, %cst_vec_4xi32_11 : vector<4xi32>
%281 = spirv.SGreaterThanEqual %275, %cst_vec_4xi32_12 : vector<4xi32>
%282 = spirv.FOrdEqual %261, %cst_vec_4xf32_9 : vector<4xf32>
%283 = spirv.FOrdEqual %261, %cst_vec_4xf32_8 : vector<4xf32>
%284 = spirv.FOrdGreaterThan %261, %cst_vec_4xf32_0 : vector<4xf32>
%285 = spirv.LogicalAnd %280, %281 : vector<4xi1>
%286 = spirv.Select %284, %cst_vec_4xf32_8, %cst_vec_4xf32_10 : vector<4xi1>, vector<4xf32>
%287 = spirv.Select %285, %279, %286 : vector<4xi1>, vector<4xf32>
%288 = spirv.Select %283, %cst_vec_4xf32_8, %287 : vector<4xi1>, vector<4xf32>
%289 = spirv.Select %282, %cst_vec_4xf32_0, %288 : vector<4xi1>, vector<4xf32>
%290 = spirv.Select %263, %261, %289 : vector<4xi1>, vector<4xf32>
%291 = spirv.FConvert %290 : vector<4xf32> to vector<4xf16>
%292 = spirv.FDiv %260, %205 : vector<4xf16>
%293 = spirv.FDiv %291, %205 : vector<4xf16>
%294 = spirv.Bitcast %293 : vector<4xf16> to vector<2xf32>
%295 = spirv.Bitcast %292 : vector<4xf16> to vector<2xf32>
%296 = spirv.VectorShuffle [4 : i32, 5 : i32, 2 : i32, 3 : i32] %cst_vec_4xf32_0 : vector<4xf32>, %295 : vector<2xf32> -> vector<4xf32>
%297 = spirv.VectorShuffle [0 : i32, 1 : i32, 4 : i32, 5 : i32] %296 : vector<4xf32>, %294 : vector<2xf32> -> vector<4xf32>
%298 = spirv.IAdd %220, %17 : i32
%299 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %298] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %299, %297 : vector<4xf32>
%300 = spirv.IAdd %206, %cst1024_i32 : i32
spirv.Branch ^bb1(%300 : i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_47, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_47 "LocalSize", 128, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_48 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_48_batch_matmul_10x9216x64x9216 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c1 = arith.constant 1 : index
%c144 = arith.constant 144 : index
%c10 = arith.constant 10 : index
hal.return %c1, %c144, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_48_batch_matmul_10x9216x64x9216() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst130_i32 = spirv.Constant 130 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst512_i32 = spirv.Constant 512 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst128_i32 = spirv.Constant 128 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst1148_i32 = spirv.Constant 1148 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst10616832_i32 = spirv.Constant 10616832 : i32
%cst36864_i32 = spirv.Constant 36864 : i32
%cst18432_i32 = spirv.Constant 18432 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst9184_i32 = spirv.Constant 9184 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.SLessThan %8, %cst0_i32 : i32
%14 = spirv.ISub %cst-1_i32, %8 : i32
%15 = spirv.Select %13, %14, %8 : i1, i32
%16 = spirv.SDiv %15, %cst16_i32 : i32
%17 = spirv.ISub %cst-1_i32, %16 : i32
%18 = spirv.Select %13, %17, %16 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%19 = spirv.SLessThan %10, %cst0_i32 : i32
%20 = spirv.ISub %cst-1_i32, %10 : i32
%21 = spirv.Select %19, %20, %10 : i1, i32
%22 = spirv.SDiv %21, %cst16_i32 : i32
%23 = spirv.ISub %cst-1_i32, %22 : i32
%24 = spirv.Select %19, %23, %22 : i1, i32
%25 = spirv.SLessThan %12, %cst0_i32 : i32
%26 = spirv.ISub %cst-1_i32, %12 : i32
%27 = spirv.Select %25, %26, %12 : i1, i32
%28 = spirv.SDiv %27, %cst16_i32 : i32
%29 = spirv.ISub %cst-1_i32, %28 : i32
%30 = spirv.Select %25, %29, %28 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32>
%33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32>
%35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32>
%37 = spirv.IMul %34, %cst73728_i32 : i32
%38 = spirv.IAdd %37, %2 : i32
%39 = spirv.IMul %4, %cst18432_i32 : i32
%40 = spirv.IAdd %38, %39 : i32
%41 = spirv.IMul %6, %cst36864_i32 : i32
%42 = spirv.IAdd %40, %41 : i32
%43 = spirv.IMul %32, %cst10616832_i32 : i32
%44 = spirv.IAdd %42, %43 : i32
%45 = spirv.IAdd %44, %18 : i32
%46 = spirv.SLessThan %2, %cst0_i32 : i32
%47 = spirv.ISub %cst-1_i32, %2 : i32
%48 = spirv.Select %46, %47, %2 : i1, i32
%49 = spirv.SDiv %48, %cst4_i32 : i32
%50 = spirv.ISub %cst-1_i32, %49 : i32
%51 = spirv.Select %46, %50, %49 : i1, i32
%52 = spirv.IMul %51, %cst1148_i32 : i32
%53 = spirv.IAdd %45, %52 : i32
%54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%55 = spirv.Load "StorageBuffer" %54 : vector<4xf32>
%56 = spirv.IMul %4, %cst80_i32 : i32
%57 = spirv.IAdd %2, %56 : i32
%58 = spirv.IMul %6, %cst160_i32 : i32
%59 = spirv.IAdd %57, %58 : i32
%60 = spirv.IAdd %59, %51 : i32
%61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %61, %55 : vector<4xf32>
%62 = spirv.IAdd %53, %cst36864_i32 : i32
%63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%64 = spirv.Load "StorageBuffer" %63 : vector<4xf32>
%65 = spirv.IAdd %60, %cst160_i32 : i32
%66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %66, %64 : vector<4xf32>
%67 = spirv.IMul %4, %cst64_i32 : i32
%68 = spirv.IAdd %2, %67 : i32
%69 = spirv.IMul %6, %cst128_i32 : i32
%70 = spirv.IAdd %68, %69 : i32
%71 = spirv.IMul %36, %cst8_i32 : i32
%72 = spirv.IAdd %70, %71 : i32
%73 = spirv.IMul %32, %cst73728_i32 : i32
%74 = spirv.IAdd %72, %73 : i32
%75 = spirv.IAdd %74, %24 : i32
%76 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %75] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%77 = spirv.Load "StorageBuffer" %76 : vector<4xf32>
%78 = spirv.IMul %4, %cst72_i32 : i32
%79 = spirv.IAdd %2, %78 : i32
%80 = spirv.IMul %6, %cst144_i32 : i32
%81 = spirv.IAdd %79, %80 : i32
%82 = spirv.SDiv %48, %cst8_i32 : i32
%83 = spirv.ISub %cst-1_i32, %82 : i32
%84 = spirv.Select %46, %83, %82 : i1, i32
%85 = spirv.IAdd %81, %84 : i32
%86 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %86, %77 : vector<4xf32>
%87 = spirv.IAdd %75, %cst128_i32 : i32
%88 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%89 = spirv.Load "StorageBuffer" %88 : vector<4xf32>
%90 = spirv.IAdd %85, %cst144_i32 : i32
%91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %91, %89 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%92 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%93 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%94 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%95 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%96 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%161: i32, %162: !spirv.coopmatrix<16x16xf16, Subgroup>, %163: !spirv.coopmatrix<16x16xf16, Subgroup>, %164: !spirv.coopmatrix<16x16xf16, Subgroup>, %165: !spirv.coopmatrix<16x16xf16, Subgroup>, %166: i32): // 2 preds: ^bb0, ^bb2
%167 = spirv.SLessThan %161, %cst9184_i32 : i32
spirv.BranchConditional %167, ^bb2, ^bb3
^bb2: // pred: ^bb1
%168 = spirv.IMul %166, %cst320_i32 : i32
%169 = spirv.IMul %4, %cst160_i32 : i32
%170 = spirv.IAdd %168, %169 : i32
%171 = spirv.IMul %6, %cst320_i32 : i32
%172 = spirv.IAdd %170, %171 : i32
%173 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%174 = spirv.NV.CooperativeMatrixLoad %173, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%175 = spirv.IAdd %172, %cst2_i32 : i32
%176 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%177 = spirv.NV.CooperativeMatrixLoad %176, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%178 = spirv.IAdd %172, %cst80_i32 : i32
%179 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%180 = spirv.NV.CooperativeMatrixLoad %179, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%181 = spirv.IAdd %172, %cst82_i32 : i32
%182 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%183 = spirv.NV.CooperativeMatrixLoad %182, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%184 = spirv.IMul %166, %cst288_i32 : i32
%185 = spirv.IMul %6, %cst288_i32 : i32
%186 = spirv.IAdd %184, %185 : i32
%187 = spirv.SDiv %48, %cst32_i32 : i32
%188 = spirv.ISub %cst-1_i32, %187 : i32
%189 = spirv.Select %46, %188, %187 : i1, i32
%190 = spirv.IMul %189, %cst4_i32 : i32
%191 = spirv.IAdd %186, %190 : i32
%192 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%193 = spirv.NV.CooperativeMatrixLoad %192, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%194 = spirv.IAdd %191, %cst2_i32 : i32
%195 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %194] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%196 = spirv.NV.CooperativeMatrixLoad %195, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%197 = spirv.IAdd %191, %cst144_i32 : i32
%198 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %197] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%199 = spirv.NV.CooperativeMatrixLoad %198, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%200 = spirv.IAdd %191, %cst146_i32 : i32
%201 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%202 = spirv.NV.CooperativeMatrixLoad %201, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%203 = spirv.NV.CooperativeMatrixMulAdd %174, %193, %162 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%204 = spirv.NV.CooperativeMatrixMulAdd %177, %199, %203 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%205 = spirv.NV.CooperativeMatrixMulAdd %174, %196, %163 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%206 = spirv.NV.CooperativeMatrixMulAdd %177, %202, %205 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%207 = spirv.NV.CooperativeMatrixMulAdd %180, %193, %164 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%208 = spirv.NV.CooperativeMatrixMulAdd %183, %199, %207 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%209 = spirv.NV.CooperativeMatrixMulAdd %180, %196, %165 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%210 = spirv.NV.CooperativeMatrixMulAdd %183, %202, %209 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%211 = spirv.IAdd %161, %cst32_i32 : i32
%212 = spirv.SLessThan %211, %cst0_i32 : i32
%213 = spirv.ISub %cst-33_i32, %161 : i32
%214 = spirv.Select %212, %213, %211 : i1, i32
%215 = spirv.SDiv %214, %cst8_i32 : i32
%216 = spirv.ISub %cst-1_i32, %215 : i32
%217 = spirv.Select %212, %216, %215 : i1, i32
%218 = spirv.IAdd %53, %217 : i32
%219 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %218] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%220 = spirv.Load "StorageBuffer" %219 : vector<4xf32>
%221 = spirv.SDiv %214, %cst32_i32 : i32
%222 = spirv.ISub %cst-1_i32, %221 : i32
%223 = spirv.Select %212, %222, %221 : i1, i32
%224 = spirv.GL.SAbs %223 : i32
%225 = spirv.GL.SAbs %cst2_i32 : i32
%226 = spirv.UMod %224, %225 : i32
%227 = spirv.IEqual %223, %224 : i32
%228 = spirv.SNegate %226 : i32
%229 = spirv.Select %227, %226, %228 : i1, i32
%230 = spirv.SLessThan %229, %cst0_i32 : i32
%231 = spirv.IAdd %229, %cst2_i32 : i32
%232 = spirv.Select %230, %231, %229 : i1, i32
%233 = spirv.IMul %223, %cst320_i32 : i32
%234 = spirv.IAdd %233, %59 : i32
%235 = spirv.SLessThan %223, %cst0_i32 : i32
%236 = spirv.ISub %cst-1_i32, %223 : i32
%237 = spirv.Select %235, %236, %223 : i1, i32
%238 = spirv.SDiv %237, %cst2_i32 : i32
%239 = spirv.ISub %cst-1_i32, %238 : i32
%240 = spirv.Select %235, %239, %238 : i1, i32
%241 = spirv.IMul %240, %cst-640_i32 : i32
%242 = spirv.IAdd %234, %241 : i32
%243 = spirv.IAdd %242, %51 : i32
%244 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %243] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %244, %220 : vector<4xf32>
%245 = spirv.IAdd %218, %cst36864_i32 : i32
%246 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %245] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%247 = spirv.Load "StorageBuffer" %246 : vector<4xf32>
%248 = spirv.IAdd %243, %cst160_i32 : i32
%249 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %248] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %249, %247 : vector<4xf32>
%250 = spirv.IMul %211, %cst8_i32 : i32
%251 = spirv.IAdd %250, %2 : i32
%252 = spirv.IAdd %251, %67 : i32
%253 = spirv.IAdd %252, %69 : i32
%254 = spirv.IAdd %253, %71 : i32
%255 = spirv.IAdd %254, %73 : i32
%256 = spirv.IAdd %255, %24 : i32
%257 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %256] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%258 = spirv.Load "StorageBuffer" %257 : vector<4xf32>
%259 = spirv.IMul %223, %cst288_i32 : i32
%260 = spirv.IAdd %259, %81 : i32
%261 = spirv.IMul %240, %cst-576_i32 : i32
%262 = spirv.IAdd %260, %261 : i32
%263 = spirv.IAdd %262, %84 : i32
%264 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %263] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %264, %258 : vector<4xf32>
%265 = spirv.IAdd %256, %cst128_i32 : i32
%266 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%267 = spirv.Load "StorageBuffer" %266 : vector<4xf32>
%268 = spirv.IAdd %263, %cst144_i32 : i32
%269 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %268] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %269, %267 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %92, %204 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %93, %206 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %94, %208 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %95, %210 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %96, %232 : i32
spirv.Branch ^bb1(%211, %204, %206, %208, %210, %232 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%97 = spirv.Load "Function" %96 : i32
%98 = spirv.Load "Function" %95 : !spirv.coopmatrix<16x16xf16, Subgroup>
%99 = spirv.Load "Function" %94 : !spirv.coopmatrix<16x16xf16, Subgroup>
%100 = spirv.Load "Function" %93 : !spirv.coopmatrix<16x16xf16, Subgroup>
%101 = spirv.Load "Function" %92 : !spirv.coopmatrix<16x16xf16, Subgroup>
%102 = spirv.IMul %4, %cst160_i32 : i32
%103 = spirv.IMul %97, %cst320_i32 : i32
%104 = spirv.IAdd %102, %103 : i32
%105 = spirv.IMul %6, %cst320_i32 : i32
%106 = spirv.IAdd %104, %105 : i32
%107 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%108 = spirv.NV.CooperativeMatrixLoad %107, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%109 = spirv.IAdd %106, %cst2_i32 : i32
%110 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%111 = spirv.NV.CooperativeMatrixLoad %110, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%112 = spirv.IAdd %106, %cst80_i32 : i32
%113 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%114 = spirv.NV.CooperativeMatrixLoad %113, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%115 = spirv.IAdd %106, %cst82_i32 : i32
%116 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%117 = spirv.NV.CooperativeMatrixLoad %116, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%118 = spirv.IMul %97, %cst288_i32 : i32
%119 = spirv.IMul %6, %cst288_i32 : i32
%120 = spirv.IAdd %118, %119 : i32
%121 = spirv.SDiv %48, %cst32_i32 : i32
%122 = spirv.ISub %cst-1_i32, %121 : i32
%123 = spirv.Select %46, %122, %121 : i1, i32
%124 = spirv.IMul %123, %cst4_i32 : i32
%125 = spirv.IAdd %120, %124 : i32
%126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%127 = spirv.NV.CooperativeMatrixLoad %126, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%128 = spirv.IAdd %125, %cst2_i32 : i32
%129 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%130 = spirv.NV.CooperativeMatrixLoad %129, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.IAdd %125, %cst144_i32 : i32
%132 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%133 = spirv.NV.CooperativeMatrixLoad %132, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%134 = spirv.IAdd %125, %cst146_i32 : i32
%135 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %134] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%136 = spirv.NV.CooperativeMatrixLoad %135, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%137 = spirv.NV.CooperativeMatrixMulAdd %108, %127, %101 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%138 = spirv.NV.CooperativeMatrixMulAdd %111, %133, %137 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%139 = spirv.NV.CooperativeMatrixMulAdd %108, %130, %100 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%140 = spirv.NV.CooperativeMatrixMulAdd %111, %136, %139 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%141 = spirv.NV.CooperativeMatrixMulAdd %114, %127, %99 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.NV.CooperativeMatrixMulAdd %117, %133, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%143 = spirv.NV.CooperativeMatrixMulAdd %114, %130, %98 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%144 = spirv.NV.CooperativeMatrixMulAdd %117, %136, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.IMul %6, %cst73728_i32 : i32
%146 = spirv.IAdd %73, %145 : i32
%147 = spirv.IMul %34, %cst512_i32 : i32
%148 = spirv.IAdd %146, %147 : i32
%149 = spirv.IMul %4, %cst256_i32 : i32
%150 = spirv.IAdd %148, %149 : i32
%151 = spirv.IAdd %150, %71 : i32
%152 = spirv.IAdd %151, %30 : i32
%153 = spirv.IAdd %152, %124 : i32
%154 = spirv.IAdd %153, %cst130_i32 : i32
%155 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %155, %144, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%156 = spirv.IAdd %153, %cst128_i32 : i32
%157 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %156] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %157, %142, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%158 = spirv.IAdd %153, %cst2_i32 : i32
%159 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %159, %140, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%160 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %153] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %160, %138, %cst8_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_48_batch_matmul_10x9216x64x9216, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_48_batch_matmul_10x9216x64x9216 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_49 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_49_generic_2x5x9216x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [8 : index, 4 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c2 = arith.constant 2 : index
%c2304 = arith.constant 2304 : index
%c10 = arith.constant 10 : index
hal.return %c2, %c2304, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
spirv.func @forward_dispatch_49_generic_2x5x9216x64() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst147456_i32 = spirv.Constant 147456 : i32
%cst737280_i32 = spirv.Constant 737280 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst5_i32 = spirv.Constant 5 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst8_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst8_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.UDiv %17, %cst5_i32 : i32
%19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32>
%21 = spirv.UMod %17, %cst5_i32 : i32
%22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32>
%26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[0 : i32] : vector<3xi32>
%28 = spirv.IMul %25, %cst16_i32 : i32
%29 = spirv.IMul %20, %cst64_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IMul %23, %cst8_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IAdd %32, %27 : i32
%34 = spirv.IMul %18, %cst737280_i32 : i32
%35 = spirv.IAdd %33, %34 : i32
%36 = spirv.IMul %21, %cst147456_i32 : i32
%37 = spirv.IAdd %35, %36 : i32
%38 = spirv.IAdd %37, %9 : i32
%39 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %38] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
%40 = spirv.Load "StorageBuffer" %39 : vector<4xf16>
%41 = spirv.IMul %25, %cst80_i32 : i32
%42 = spirv.IMul %20, %cst320_i32 : i32
%43 = spirv.IAdd %41, %42 : i32
%44 = spirv.IAdd %43, %31 : i32
%45 = spirv.IAdd %44, %27 : i32
%46 = spirv.IAdd %45, %34 : i32
%47 = spirv.IMul %21, %cst16_i32 : i32
%48 = spirv.IAdd %46, %47 : i32
%49 = spirv.IAdd %48, %15 : i32
%50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf16>, stride=8> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %50, %40 : vector<4xf16>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_49_generic_2x5x9216x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_49_generic_2x5x9216x64 "LocalSize", 8, 4, 1
}
}
}
}
hal.executable private @forward_dispatch_50 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_50_matmul_18432x320x320 ordinal(0) layout(#pipeline_layout8) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c5 = arith.constant 5 : index
%c288 = arith.constant 288 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c288, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_50_matmul_18432x320x320() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst642_i32 = spirv.Constant 642 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst-640_i32 = spirv.Constant -640 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst36_i32 = spirv.Constant 36 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst2560_i32 = spirv.Constant 2560 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst3_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%14 = spirv.Load "PushConstant" %13 : i32
%15 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst4_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
%16 = spirv.Load "PushConstant" %15 : i32
%17 = spirv.SLessThan %8, %cst0_i32 : i32
%18 = spirv.ISub %cst-1_i32, %8 : i32
%19 = spirv.Select %17, %18, %8 : i1, i32
%20 = spirv.SDiv %19, %cst16_i32 : i32
%21 = spirv.ISub %cst-1_i32, %20 : i32
%22 = spirv.Select %17, %21, %20 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%23 = spirv.SLessThan %12, %cst0_i32 : i32
%24 = spirv.ISub %cst-1_i32, %12 : i32
%25 = spirv.Select %23, %24, %12 : i1, i32
%26 = spirv.SDiv %25, %cst16_i32 : i32
%27 = spirv.ISub %cst-1_i32, %26 : i32
%28 = spirv.Select %23, %27, %26 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%29 = spirv.SLessThan %14, %cst0_i32 : i32
%30 = spirv.ISub %cst-1_i32, %14 : i32
%31 = spirv.Select %29, %30, %14 : i1, i32
%32 = spirv.SDiv %31, %cst16_i32 : i32
%33 = spirv.ISub %cst-1_i32, %32 : i32
%34 = spirv.Select %29, %33, %32 : i1, i32
%35 = spirv.SLessThan %10, %cst0_i32 : i32
%36 = spirv.ISub %cst-1_i32, %10 : i32
%37 = spirv.Select %35, %36, %10 : i1, i32
%38 = spirv.SDiv %37, %cst16_i32 : i32
%39 = spirv.ISub %cst-1_i32, %38 : i32
%40 = spirv.Select %35, %39, %38 : i1, i32
%41 = spirv.SLessThan %16, %cst0_i32 : i32
%42 = spirv.ISub %cst-1_i32, %16 : i32
%43 = spirv.Select %41, %42, %16 : i1, i32
%44 = spirv.SDiv %43, %cst16_i32 : i32
%45 = spirv.ISub %cst-1_i32, %44 : i32
%46 = spirv.Select %41, %45, %44 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%47 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%48 = spirv.CompositeExtract %47[1 : i32] : vector<3xi32>
%49 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%50 = spirv.CompositeExtract %49[0 : i32] : vector<3xi32>
%51 = spirv.IMul %48, %cst2560_i32 : i32
%52 = spirv.IAdd %51, %2 : i32
%53 = spirv.IMul %4, %cst640_i32 : i32
%54 = spirv.IAdd %52, %53 : i32
%55 = spirv.IMul %6, %cst1280_i32 : i32
%56 = spirv.IAdd %54, %55 : i32
%57 = spirv.IAdd %56, %22 : i32
%58 = spirv.SLessThan %2, %cst0_i32 : i32
%59 = spirv.ISub %cst-1_i32, %2 : i32
%60 = spirv.Select %58, %59, %2 : i1, i32
%61 = spirv.SDiv %60, %cst4_i32 : i32
%62 = spirv.ISub %cst-1_i32, %61 : i32
%63 = spirv.Select %58, %62, %61 : i1, i32
%64 = spirv.IMul %63, %cst36_i32 : i32
%65 = spirv.IAdd %57, %64 : i32
%66 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%67 = spirv.Load "StorageBuffer" %66 : vector<4xf32>
%68 = spirv.IMul %4, %cst80_i32 : i32
%69 = spirv.IAdd %2, %68 : i32
%70 = spirv.IMul %6, %cst160_i32 : i32
%71 = spirv.IAdd %69, %70 : i32
%72 = spirv.IAdd %71, %63 : i32
%73 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %72] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %73, %67 : vector<4xf32>
%74 = spirv.IAdd %65, %cst1280_i32 : i32
%75 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %74] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%76 = spirv.Load "StorageBuffer" %75 : vector<4xf32>
%77 = spirv.IAdd %72, %cst160_i32 : i32
%78 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %78, %76 : vector<4xf32>
%79 = spirv.IMul %4, %cst320_i32 : i32
%80 = spirv.IAdd %2, %79 : i32
%81 = spirv.IMul %6, %cst640_i32 : i32
%82 = spirv.IAdd %80, %81 : i32
%83 = spirv.IMul %50, %cst8_i32 : i32
%84 = spirv.IAdd %82, %83 : i32
%85 = spirv.IAdd %84, %28 : i32
%86 = spirv.SDiv %60, %cst8_i32 : i32
%87 = spirv.ISub %cst-1_i32, %86 : i32
%88 = spirv.Select %58, %87, %86 : i1, i32
%89 = spirv.IMul %88, %cst32_i32 : i32
%90 = spirv.IAdd %85, %89 : i32
%91 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%92 = spirv.Load "StorageBuffer" %91 : vector<4xf32>
%93 = spirv.IMul %4, %cst72_i32 : i32
%94 = spirv.IAdd %2, %93 : i32
%95 = spirv.IMul %6, %cst144_i32 : i32
%96 = spirv.IAdd %94, %95 : i32
%97 = spirv.IAdd %96, %88 : i32
%98 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %97] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %98, %92 : vector<4xf32>
%99 = spirv.IAdd %90, %cst640_i32 : i32
%100 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%101 = spirv.Load "StorageBuffer" %100 : vector<4xf32>
%102 = spirv.IAdd %97, %cst144_i32 : i32
%103 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %103, %101 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%104 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%105 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%106 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%107 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%108 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%193: i32, %194: !spirv.coopmatrix<16x16xf16, Subgroup>, %195: !spirv.coopmatrix<16x16xf16, Subgroup>, %196: !spirv.coopmatrix<16x16xf16, Subgroup>, %197: !spirv.coopmatrix<16x16xf16, Subgroup>, %198: i32): // 2 preds: ^bb0, ^bb2
%199 = spirv.SLessThan %193, %cst288_i32 : i32
spirv.BranchConditional %199, ^bb2, ^bb3
^bb2: // pred: ^bb1
%200 = spirv.IMul %198, %cst320_i32 : i32
%201 = spirv.IMul %4, %cst160_i32 : i32
%202 = spirv.IAdd %200, %201 : i32
%203 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %202] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%204 = spirv.NV.CooperativeMatrixLoad %203, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%205 = spirv.IAdd %202, %cst2_i32 : i32
%206 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %205] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%207 = spirv.NV.CooperativeMatrixLoad %206, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%208 = spirv.IAdd %202, %cst80_i32 : i32
%209 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %208] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%210 = spirv.NV.CooperativeMatrixLoad %209, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%211 = spirv.IAdd %202, %cst82_i32 : i32
%212 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %211] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%213 = spirv.NV.CooperativeMatrixLoad %212, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%214 = spirv.IMul %198, %cst288_i32 : i32
%215 = spirv.SDiv %60, %cst32_i32 : i32
%216 = spirv.ISub %cst-1_i32, %215 : i32
%217 = spirv.Select %58, %216, %215 : i1, i32
%218 = spirv.IMul %217, %cst4_i32 : i32
%219 = spirv.IAdd %214, %218 : i32
%220 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %219] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%221 = spirv.NV.CooperativeMatrixLoad %220, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%222 = spirv.IAdd %219, %cst2_i32 : i32
%223 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %222] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%224 = spirv.NV.CooperativeMatrixLoad %223, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%225 = spirv.IAdd %219, %cst144_i32 : i32
%226 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %225] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%227 = spirv.NV.CooperativeMatrixLoad %226, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%228 = spirv.IAdd %219, %cst146_i32 : i32
%229 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %228] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%230 = spirv.NV.CooperativeMatrixLoad %229, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%231 = spirv.NV.CooperativeMatrixMulAdd %204, %221, %194 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%232 = spirv.NV.CooperativeMatrixMulAdd %207, %227, %231 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%233 = spirv.NV.CooperativeMatrixMulAdd %204, %224, %195 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%234 = spirv.NV.CooperativeMatrixMulAdd %207, %230, %233 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%235 = spirv.NV.CooperativeMatrixMulAdd %210, %221, %196 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%236 = spirv.NV.CooperativeMatrixMulAdd %213, %227, %235 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%237 = spirv.NV.CooperativeMatrixMulAdd %210, %224, %197 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%238 = spirv.NV.CooperativeMatrixMulAdd %213, %230, %237 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%239 = spirv.IAdd %193, %cst32_i32 : i32
%240 = spirv.SLessThan %239, %cst0_i32 : i32
%241 = spirv.ISub %cst-33_i32, %193 : i32
%242 = spirv.Select %240, %241, %239 : i1, i32
%243 = spirv.SDiv %242, %cst8_i32 : i32
%244 = spirv.ISub %cst-1_i32, %243 : i32
%245 = spirv.Select %240, %244, %243 : i1, i32
%246 = spirv.IAdd %57, %245 : i32
%247 = spirv.IAdd %246, %64 : i32
%248 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%249 = spirv.Load "StorageBuffer" %248 : vector<4xf32>
%250 = spirv.SDiv %242, %cst32_i32 : i32
%251 = spirv.ISub %cst-1_i32, %250 : i32
%252 = spirv.Select %240, %251, %250 : i1, i32
%253 = spirv.GL.SAbs %252 : i32
%254 = spirv.GL.SAbs %cst2_i32 : i32
%255 = spirv.UMod %253, %254 : i32
%256 = spirv.IEqual %252, %253 : i32
%257 = spirv.SNegate %255 : i32
%258 = spirv.Select %256, %255, %257 : i1, i32
%259 = spirv.SLessThan %258, %cst0_i32 : i32
%260 = spirv.IAdd %258, %cst2_i32 : i32
%261 = spirv.Select %259, %260, %258 : i1, i32
%262 = spirv.IMul %252, %cst320_i32 : i32
%263 = spirv.IAdd %262, %71 : i32
%264 = spirv.SLessThan %252, %cst0_i32 : i32
%265 = spirv.ISub %cst-1_i32, %252 : i32
%266 = spirv.Select %264, %265, %252 : i1, i32
%267 = spirv.SDiv %266, %cst2_i32 : i32
%268 = spirv.ISub %cst-1_i32, %267 : i32
%269 = spirv.Select %264, %268, %267 : i1, i32
%270 = spirv.IMul %269, %cst-640_i32 : i32
%271 = spirv.IAdd %263, %270 : i32
%272 = spirv.IAdd %271, %63 : i32
%273 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %272] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %273, %249 : vector<4xf32>
%274 = spirv.IAdd %247, %cst1280_i32 : i32
%275 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %274] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%276 = spirv.Load "StorageBuffer" %275 : vector<4xf32>
%277 = spirv.IAdd %272, %cst160_i32 : i32
%278 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %277] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %278, %276 : vector<4xf32>
%279 = spirv.IMul %239, %cst40_i32 : i32
%280 = spirv.IAdd %279, %2 : i32
%281 = spirv.IAdd %280, %79 : i32
%282 = spirv.IAdd %281, %81 : i32
%283 = spirv.IAdd %282, %83 : i32
%284 = spirv.IAdd %283, %28 : i32
%285 = spirv.IAdd %284, %89 : i32
%286 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %285] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%287 = spirv.Load "StorageBuffer" %286 : vector<4xf32>
%288 = spirv.IMul %252, %cst288_i32 : i32
%289 = spirv.IAdd %288, %96 : i32
%290 = spirv.IMul %269, %cst-576_i32 : i32
%291 = spirv.IAdd %289, %290 : i32
%292 = spirv.IAdd %291, %88 : i32
%293 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %292] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %293, %287 : vector<4xf32>
%294 = spirv.IAdd %285, %cst640_i32 : i32
%295 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %294] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%296 = spirv.Load "StorageBuffer" %295 : vector<4xf32>
%297 = spirv.IAdd %292, %cst144_i32 : i32
%298 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %297] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %298, %296 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %104, %232 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %105, %234 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %106, %236 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %107, %238 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %108, %261 : i32
spirv.Branch ^bb1(%239, %232, %234, %236, %238, %261 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%109 = spirv.Load "Function" %108 : i32
%110 = spirv.Load "Function" %107 : !spirv.coopmatrix<16x16xf16, Subgroup>
%111 = spirv.Load "Function" %106 : !spirv.coopmatrix<16x16xf16, Subgroup>
%112 = spirv.Load "Function" %105 : !spirv.coopmatrix<16x16xf16, Subgroup>
%113 = spirv.Load "Function" %104 : !spirv.coopmatrix<16x16xf16, Subgroup>
%114 = spirv.IMul %4, %cst160_i32 : i32
%115 = spirv.IMul %109, %cst320_i32 : i32
%116 = spirv.IAdd %114, %115 : i32
%117 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %116] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%118 = spirv.NV.CooperativeMatrixLoad %117, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%119 = spirv.IAdd %116, %cst2_i32 : i32
%120 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %119] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%121 = spirv.NV.CooperativeMatrixLoad %120, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%122 = spirv.IAdd %116, %cst80_i32 : i32
%123 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %122] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%124 = spirv.NV.CooperativeMatrixLoad %123, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%125 = spirv.IAdd %116, %cst82_i32 : i32
%126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%127 = spirv.NV.CooperativeMatrixLoad %126, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%128 = spirv.IMul %109, %cst288_i32 : i32
%129 = spirv.SDiv %60, %cst32_i32 : i32
%130 = spirv.ISub %cst-1_i32, %129 : i32
%131 = spirv.Select %58, %130, %129 : i1, i32
%132 = spirv.IMul %131, %cst4_i32 : i32
%133 = spirv.IAdd %128, %132 : i32
%134 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%135 = spirv.NV.CooperativeMatrixLoad %134, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%136 = spirv.IAdd %133, %cst2_i32 : i32
%137 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %136] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%138 = spirv.NV.CooperativeMatrixLoad %137, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%139 = spirv.IAdd %133, %cst144_i32 : i32
%140 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.IAdd %133, %cst146_i32 : i32
%143 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.NV.CooperativeMatrixMulAdd %118, %135, %113 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%146 = spirv.NV.CooperativeMatrixMulAdd %121, %141, %145 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%147 = spirv.NV.CooperativeMatrixMulAdd %118, %138, %112 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%148 = spirv.NV.CooperativeMatrixMulAdd %121, %144, %147 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%149 = spirv.NV.CooperativeMatrixMulAdd %124, %135, %111 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%150 = spirv.NV.CooperativeMatrixMulAdd %127, %141, %149 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%151 = spirv.NV.CooperativeMatrixMulAdd %124, %138, %110 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%152 = spirv.NV.CooperativeMatrixMulAdd %127, %144, %151 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%153 = spirv.IAdd %83, %34 : i32
%154 = spirv.IAdd %153, %132 : i32
%155 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%156 = spirv.NV.CooperativeMatrixLoad %155, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%157 = spirv.IAdd %154, %cst2_i32 : i32
%158 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%159 = spirv.NV.CooperativeMatrixLoad %158, %cst0_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%160 = spirv.IMul %4, %cst1280_i32 : i32
%161 = spirv.IAdd %51, %160 : i32
%162 = spirv.IAdd %161, %83 : i32
%163 = spirv.IAdd %162, %40 : i32
%164 = spirv.IAdd %163, %132 : i32
%165 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %164] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%166 = spirv.NV.CooperativeMatrixLoad %165, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%167 = spirv.IAdd %164, %cst2_i32 : i32
%168 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %167] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%169 = spirv.NV.CooperativeMatrixLoad %168, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%170 = spirv.IAdd %164, %cst640_i32 : i32
%171 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %170] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%172 = spirv.NV.CooperativeMatrixLoad %171, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%173 = spirv.IAdd %164, %cst642_i32 : i32
%174 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %173] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%175 = spirv.NV.CooperativeMatrixLoad %174, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer> as !spirv.coopmatrix<16x16xf16, Subgroup>
%176 = spirv.FAdd %156, %146 : !spirv.coopmatrix<16x16xf16, Subgroup>
%177 = spirv.FAdd %159, %148 : !spirv.coopmatrix<16x16xf16, Subgroup>
%178 = spirv.FAdd %156, %150 : !spirv.coopmatrix<16x16xf16, Subgroup>
%179 = spirv.FAdd %159, %152 : !spirv.coopmatrix<16x16xf16, Subgroup>
%180 = spirv.FAdd %176, %166 : !spirv.coopmatrix<16x16xf16, Subgroup>
%181 = spirv.FAdd %177, %169 : !spirv.coopmatrix<16x16xf16, Subgroup>
%182 = spirv.FAdd %178, %172 : !spirv.coopmatrix<16x16xf16, Subgroup>
%183 = spirv.FAdd %179, %175 : !spirv.coopmatrix<16x16xf16, Subgroup>
%184 = spirv.IAdd %162, %46 : i32
%185 = spirv.IAdd %184, %132 : i32
%186 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %185] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %186, %180, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%187 = spirv.IAdd %185, %cst2_i32 : i32
%188 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %188, %181, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%189 = spirv.IAdd %185, %cst640_i32 : i32
%190 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %189] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %190, %182, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%191 = spirv.IAdd %185, %cst642_i32 : i32
%192 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %192, %183, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_50_matmul_18432x320x320, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_50_matmul_18432x320x320 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_55 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_55_matmul_160x320x1024 ordinal(0) layout(#pipeline_layout10) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [128 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c5 = arith.constant 5 : index
%c1 = arith.constant 1 : index
hal.return %c5, %c5, %c1 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_2_ bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_55_matmul_160x320x1024() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst9_i32 = spirv.Constant 9 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst-33_i32 = spirv.Constant -33 : i32
%cst1280_i32 = spirv.Constant 1280 : i32
%cst-576_i32 = spirv.Constant -576 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst-320_i32 = spirv.Constant -320 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst640_i32 = spirv.Constant 640 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst16752_i32 = spirv.Constant 16752 : i32
%cst124_i32 = spirv.Constant 124 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst4096_i32 = spirv.Constant 4096 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst992_i32 = spirv.Constant 992 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__5_addr = spirv.mlir.addressof @__workgroup_mem__5 : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%11 = spirv.SLessThan %8, %cst0_i32 : i32
%12 = spirv.ISub %cst-1_i32, %8 : i32
%13 = spirv.Select %11, %12, %8 : i1, i32
%14 = spirv.SDiv %13, %cst16_i32 : i32
%15 = spirv.ISub %cst-1_i32, %14 : i32
%16 = spirv.Select %11, %15, %14 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%17 = spirv.SLessThan %10, %cst0_i32 : i32
%18 = spirv.ISub %cst-1_i32, %10 : i32
%19 = spirv.Select %17, %18, %10 : i1, i32
%20 = spirv.SDiv %19, %cst16_i32 : i32
%21 = spirv.ISub %cst-1_i32, %20 : i32
%22 = spirv.Select %17, %21, %20 : i1, i32
%__resource_var_0_2__addr = spirv.mlir.addressof @__resource_var_0_2_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%23 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%24 = spirv.CompositeExtract %23[1 : i32] : vector<3xi32>
%25 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%26 = spirv.CompositeExtract %25[0 : i32] : vector<3xi32>
%27 = spirv.IMul %24, %cst4096_i32 : i32
%28 = spirv.IAdd %27, %2 : i32
%29 = spirv.IMul %4, %cst4096_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IMul %6, %cst4096_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.SLessThan %2, %cst0_i32 : i32
%34 = spirv.ISub %cst-1_i32, %2 : i32
%35 = spirv.Select %33, %34, %2 : i1, i32
%36 = spirv.SDiv %35, %cst4_i32 : i32
%37 = spirv.ISub %cst-1_i32, %36 : i32
%38 = spirv.Select %33, %37, %36 : i1, i32
%39 = spirv.IMul %38, %cst124_i32 : i32
%40 = spirv.IAdd %32, %39 : i32
%41 = spirv.IAdd %40, %cst16752_i32 : i32
%42 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %41] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%43 = spirv.Load "StorageBuffer" %42 : vector<4xf32>
%44 = spirv.IMul %4, %cst160_i32 : i32
%45 = spirv.IAdd %2, %44 : i32
%46 = spirv.IMul %6, %cst160_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IAdd %47, %38 : i32
%49 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %48] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %49, %43 : vector<4xf32>
%50 = spirv.IMul %4, %cst640_i32 : i32
%51 = spirv.IAdd %2, %50 : i32
%52 = spirv.IMul %6, %cst640_i32 : i32
%53 = spirv.IAdd %51, %52 : i32
%54 = spirv.IMul %26, %cst8_i32 : i32
%55 = spirv.IAdd %53, %54 : i32
%56 = spirv.IAdd %55, %16 : i32
%57 = spirv.SDiv %35, %cst8_i32 : i32
%58 = spirv.ISub %cst-1_i32, %57 : i32
%59 = spirv.Select %33, %58, %57 : i1, i32
%60 = spirv.IMul %59, %cst32_i32 : i32
%61 = spirv.IAdd %56, %60 : i32
%62 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%63 = spirv.Load "StorageBuffer" %62 : vector<4xf32>
%64 = spirv.IMul %4, %cst144_i32 : i32
%65 = spirv.IAdd %2, %64 : i32
%66 = spirv.IMul %6, %cst144_i32 : i32
%67 = spirv.IAdd %65, %66 : i32
%68 = spirv.IAdd %67, %59 : i32
%69 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %68] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %69, %63 : vector<4xf32>
%70 = spirv.IAdd %61, %cst640_i32 : i32
%71 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%72 = spirv.Load "StorageBuffer" %71 : vector<4xf32>
%73 = spirv.IAdd %68, %cst144_i32 : i32
%74 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %74, %72 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%75 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%76 = spirv.Variable : !spirv.ptr<!spirv.coopmatrix<16x16xf16, Subgroup>, Function>
%77 = spirv.Variable : !spirv.ptr<i32, Function>
spirv.mlir.loop {
spirv.Branch ^bb1(%cst0_i32, %0, %0, %cst0_i32 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb1(%117: i32, %118: !spirv.coopmatrix<16x16xf16, Subgroup>, %119: !spirv.coopmatrix<16x16xf16, Subgroup>, %120: i32): // 2 preds: ^bb0, ^bb2
%121 = spirv.SLessThan %117, %cst992_i32 : i32
spirv.BranchConditional %121, ^bb2, ^bb3
^bb2: // pred: ^bb1
%122 = spirv.IMul %120, %cst160_i32 : i32
%123 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %122] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%124 = spirv.NV.CooperativeMatrixLoad %123, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%125 = spirv.IAdd %122, %cst2_i32 : i32
%126 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %125] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%127 = spirv.NV.CooperativeMatrixLoad %126, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%128 = spirv.IAdd %122, %cst80_i32 : i32
%129 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %128] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%130 = spirv.NV.CooperativeMatrixLoad %129, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%131 = spirv.IAdd %122, %cst82_i32 : i32
%132 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %131] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%133 = spirv.NV.CooperativeMatrixLoad %132, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%134 = spirv.IMul %120, %cst288_i32 : i32
%135 = spirv.SDiv %35, %cst32_i32 : i32
%136 = spirv.ISub %cst-1_i32, %135 : i32
%137 = spirv.Select %33, %136, %135 : i1, i32
%138 = spirv.IMul %137, %cst2_i32 : i32
%139 = spirv.IAdd %134, %138 : i32
%140 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%141 = spirv.NV.CooperativeMatrixLoad %140, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.IAdd %139, %cst144_i32 : i32
%143 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%144 = spirv.NV.CooperativeMatrixLoad %143, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.NV.CooperativeMatrixMulAdd %124, %141, %118 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%146 = spirv.NV.CooperativeMatrixMulAdd %127, %144, %145 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%147 = spirv.NV.CooperativeMatrixMulAdd %130, %141, %119 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%148 = spirv.NV.CooperativeMatrixMulAdd %133, %144, %147 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%149 = spirv.IAdd %117, %cst32_i32 : i32
%150 = spirv.SLessThan %149, %cst0_i32 : i32
%151 = spirv.ISub %cst-33_i32, %117 : i32
%152 = spirv.Select %150, %151, %149 : i1, i32
%153 = spirv.SDiv %152, %cst8_i32 : i32
%154 = spirv.ISub %cst-1_i32, %153 : i32
%155 = spirv.Select %150, %154, %153 : i1, i32
%156 = spirv.IAdd %32, %155 : i32
%157 = spirv.IAdd %156, %39 : i32
%158 = spirv.IAdd %157, %cst16752_i32 : i32
%159 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %158] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%160 = spirv.Load "StorageBuffer" %159 : vector<4xf32>
%161 = spirv.SDiv %152, %cst32_i32 : i32
%162 = spirv.ISub %cst-1_i32, %161 : i32
%163 = spirv.Select %150, %162, %161 : i1, i32
%164 = spirv.GL.SAbs %163 : i32
%165 = spirv.GL.SAbs %cst2_i32 : i32
%166 = spirv.UMod %164, %165 : i32
%167 = spirv.IEqual %163, %164 : i32
%168 = spirv.SNegate %166 : i32
%169 = spirv.Select %167, %166, %168 : i1, i32
%170 = spirv.SLessThan %169, %cst0_i32 : i32
%171 = spirv.IAdd %169, %cst2_i32 : i32
%172 = spirv.Select %170, %171, %169 : i1, i32
%173 = spirv.IMul %163, %cst160_i32 : i32
%174 = spirv.IAdd %173, %47 : i32
%175 = spirv.SLessThan %163, %cst0_i32 : i32
%176 = spirv.ISub %cst-1_i32, %163 : i32
%177 = spirv.Select %175, %176, %163 : i1, i32
%178 = spirv.SDiv %177, %cst2_i32 : i32
%179 = spirv.ISub %cst-1_i32, %178 : i32
%180 = spirv.Select %175, %179, %178 : i1, i32
%181 = spirv.IMul %180, %cst-320_i32 : i32
%182 = spirv.IAdd %174, %181 : i32
%183 = spirv.IAdd %182, %38 : i32
%184 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %183] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %184, %160 : vector<4xf32>
%185 = spirv.IMul %149, %cst40_i32 : i32
%186 = spirv.IAdd %185, %2 : i32
%187 = spirv.IAdd %186, %50 : i32
%188 = spirv.IAdd %187, %52 : i32
%189 = spirv.IAdd %188, %54 : i32
%190 = spirv.IAdd %189, %16 : i32
%191 = spirv.IAdd %190, %60 : i32
%192 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %191] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%193 = spirv.Load "StorageBuffer" %192 : vector<4xf32>
%194 = spirv.IMul %163, %cst288_i32 : i32
%195 = spirv.IAdd %194, %67 : i32
%196 = spirv.IMul %180, %cst-576_i32 : i32
%197 = spirv.IAdd %195, %196 : i32
%198 = spirv.IAdd %197, %59 : i32
%199 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %198] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %199, %193 : vector<4xf32>
%200 = spirv.IAdd %191, %cst640_i32 : i32
%201 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %200] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%202 = spirv.Load "StorageBuffer" %201 : vector<4xf32>
%203 = spirv.IAdd %198, %cst144_i32 : i32
%204 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %203] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %204, %202 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
spirv.Store "Function" %75, %146 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %76, %148 : !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Store "Function" %77, %172 : i32
spirv.Branch ^bb1(%149, %146, %148, %172 : i32, !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup>, i32)
^bb3: // pred: ^bb1
spirv.mlir.merge
}
%78 = spirv.Load "Function" %77 : i32
%79 = spirv.Load "Function" %76 : !spirv.coopmatrix<16x16xf16, Subgroup>
%80 = spirv.Load "Function" %75 : !spirv.coopmatrix<16x16xf16, Subgroup>
%81 = spirv.IMul %78, %cst160_i32 : i32
%82 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %81] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%83 = spirv.NV.CooperativeMatrixLoad %82, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%84 = spirv.IAdd %81, %cst2_i32 : i32
%85 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %84] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%86 = spirv.NV.CooperativeMatrixLoad %85, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%87 = spirv.IAdd %81, %cst80_i32 : i32
%88 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %87] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%89 = spirv.NV.CooperativeMatrixLoad %88, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%90 = spirv.IAdd %81, %cst82_i32 : i32
%91 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %90] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%92 = spirv.NV.CooperativeMatrixLoad %91, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%93 = spirv.IMul %78, %cst288_i32 : i32
%94 = spirv.SDiv %35, %cst32_i32 : i32
%95 = spirv.ISub %cst-1_i32, %94 : i32
%96 = spirv.Select %33, %95, %94 : i1, i32
%97 = spirv.IMul %96, %cst2_i32 : i32
%98 = spirv.IAdd %93, %97 : i32
%99 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %98] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%100 = spirv.NV.CooperativeMatrixLoad %99, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%101 = spirv.IAdd %98, %cst144_i32 : i32
%102 = spirv.AccessChain %__workgroup_mem__5_addr[%cst0_i32, %101] : !spirv.ptr<!spirv.struct<(!spirv.array<576 x vector<4xf32>>)>, Workgroup>, i32, i32
%103 = spirv.NV.CooperativeMatrixLoad %102, %cst9_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%104 = spirv.NV.CooperativeMatrixMulAdd %83, %100, %80 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%105 = spirv.NV.CooperativeMatrixMulAdd %86, %103, %104 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%106 = spirv.NV.CooperativeMatrixMulAdd %89, %100, %79 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%107 = spirv.NV.CooperativeMatrixMulAdd %92, %103, %106 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%108 = spirv.IMul %24, %cst1280_i32 : i32
%109 = spirv.IMul %4, %cst1280_i32 : i32
%110 = spirv.IAdd %108, %109 : i32
%111 = spirv.IAdd %110, %54 : i32
%112 = spirv.IAdd %111, %22 : i32
%113 = spirv.IAdd %112, %97 : i32
%114 = spirv.IAdd %113, %cst640_i32 : i32
%115 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %115, %107, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%116 = spirv.AccessChain %__resource_var_0_2__addr[%cst0_i32, %113] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %116, %105, %cst40_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_55_matmul_160x320x1024, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_55_matmul_160x320x1024 "LocalSize", 128, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_57 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_57_generic_2x77x5x64 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c2 = arith.constant 2 : index
%c5 = arith.constant 5 : index
%c154 = arith.constant 154 : index
hal.return %c2, %c5, %c154 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_57_generic_2x77x5x64() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst4928_i32 = spirv.Constant 4928 : i32
%cst24640_i32 = spirv.Constant 24640 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst77_i32 = spirv.Constant 77 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.UDiv %17, %cst77_i32 : i32
%19 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%20 = spirv.CompositeExtract %19[1 : i32] : vector<3xi32>
%21 = spirv.UMod %17, %cst77_i32 : i32
%22 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[0 : i32] : vector<3xi32>
%26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[1 : i32] : vector<3xi32>
%28 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%29 = spirv.CompositeExtract %28[2 : i32] : vector<3xi32>
%30 = spirv.IMul %21, %cst320_i32 : i32
%31 = spirv.IMul %29, %cst320_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IMul %20, %cst64_i32 : i32
%34 = spirv.IAdd %32, %33 : i32
%35 = spirv.IMul %27, %cst64_i32 : i32
%36 = spirv.IAdd %34, %35 : i32
%37 = spirv.IAdd %36, %25 : i32
%38 = spirv.IMul %23, %cst32_i32 : i32
%39 = spirv.IAdd %37, %38 : i32
%40 = spirv.IMul %18, %cst24640_i32 : i32
%41 = spirv.IAdd %39, %40 : i32
%42 = spirv.IAdd %41, %9 : i32
%43 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %42] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%44 = spirv.Load "StorageBuffer" %43 : f16
%45 = spirv.IMul %20, %cst4928_i32 : i32
%46 = spirv.IMul %27, %cst4928_i32 : i32
%47 = spirv.IAdd %45, %46 : i32
%48 = spirv.IMul %21, %cst64_i32 : i32
%49 = spirv.IAdd %47, %48 : i32
%50 = spirv.IMul %29, %cst64_i32 : i32
%51 = spirv.IAdd %49, %50 : i32
%52 = spirv.IAdd %51, %25 : i32
%53 = spirv.IAdd %52, %38 : i32
%54 = spirv.IAdd %53, %40 : i32
%55 = spirv.IAdd %54, %15 : i32
%56 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %56, %44 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_57_generic_2x77x5x64, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_57_generic_2x77x5x64 "LocalSize", 32, 1, 1
}
}
}
}
hal.executable private @forward_dispatch_59 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_59_generic_2x320x77 ordinal(0) layout(#pipeline_layout11) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c77 = arith.constant 77 : index
%c10 = arith.constant 10 : index
%c2 = arith.constant 2 : index
hal.return %c77, %c10, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_59_generic_2x320x77() "None" {
%cst2464_i32 = spirv.Constant 2464 : i32
%cst77_i32 = spirv.Constant 77 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst24640_i32 = spirv.Constant 24640 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 0.353553385 : f32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.SLessThan %1, %cst0_i32 : i32
%3 = spirv.ISub %cst-1_i32, %1 : i32
%4 = spirv.Select %2, %3, %1 : i1, i32
%5 = spirv.SDiv %4, %cst2_i32 : i32
%6 = spirv.ISub %cst-1_i32, %5 : i32
%7 = spirv.Select %2, %6, %5 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%9 = spirv.CompositeExtract %8[2 : i32] : vector<3xi32>
%10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32>
%12 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%13 = spirv.CompositeExtract %12[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%15 = spirv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spirv.IMul %9, %cst24640_i32 : i32
%21 = spirv.IMul %19, %cst24640_i32 : i32
%22 = spirv.IAdd %20, %21 : i32
%23 = spirv.IMul %13, %cst320_i32 : i32
%24 = spirv.IAdd %22, %23 : i32
%25 = spirv.IMul %15, %cst320_i32 : i32
%26 = spirv.IAdd %24, %25 : i32
%27 = spirv.IAdd %26, %17 : i32
%28 = spirv.IMul %11, %cst32_i32 : i32
%29 = spirv.IAdd %27, %28 : i32
%30 = spirv.IAdd %29, %7 : i32
%31 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %30] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%32 = spirv.Load "StorageBuffer" %31 : f16
%33 = spirv.FConvert %cst_f32 : f32 to f16
%34 = spirv.FMul %32, %33 : f16
%35 = spirv.IMul %17, %cst77_i32 : i32
%36 = spirv.IAdd %22, %35 : i32
%37 = spirv.IMul %11, %cst2464_i32 : i32
%38 = spirv.IAdd %36, %37 : i32
%39 = spirv.IAdd %38, %13 : i32
%40 = spirv.IAdd %39, %15 : i32
%41 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %41, %34 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_59_generic_2x320x77, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_59_generic_2x320x77 "LocalSize", 1, 32, 1
}
}
}
}
hal.executable private @forward_dispatch_60 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_60 ordinal(0) layout(#pipeline_layout11) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c77 = arith.constant 77 : index
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
hal.return %c77, %c2, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_60() "None" {
%cst3072_i32 = spirv.Constant 3072 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst6144_i32 = spirv.Constant 6144 : i32
%cst2464_i32 = spirv.Constant 2464 : i32
%cst77_i32 = spirv.Constant 77 : i32
%cst4928_i32 = spirv.Constant 4928 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<1 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%2 = spirv.SLessThan %1, %cst0_i32 : i32
%3 = spirv.ISub %cst-1_i32, %1 : i32
%4 = spirv.Select %2, %3, %1 : i1, i32
%5 = spirv.SDiv %4, %cst2_i32 : i32
%6 = spirv.ISub %cst-1_i32, %5 : i32
%7 = spirv.Select %2, %6, %5 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%8 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%9 = spirv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%11 = spirv.CompositeExtract %10[1 : i32] : vector<3xi32>
%12 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%13 = spirv.CompositeExtract %12[2 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%14 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%15 = spirv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spirv.IMul %13, %cst4928_i32 : i32
%21 = spirv.IMul %19, %cst4928_i32 : i32
%22 = spirv.IAdd %20, %21 : i32
%23 = spirv.IMul %17, %cst77_i32 : i32
%24 = spirv.IAdd %22, %23 : i32
%25 = spirv.IMul %11, %cst2464_i32 : i32
%26 = spirv.IAdd %24, %25 : i32
%27 = spirv.IAdd %26, %9 : i32
%28 = spirv.IAdd %27, %15 : i32
%29 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %28] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%30 = spirv.Load "StorageBuffer" %29 : f16
%31 = spirv.IMul %13, %cst6144_i32 : i32
%32 = spirv.IMul %19, %cst6144_i32 : i32
%33 = spirv.IAdd %31, %32 : i32
%34 = spirv.IMul %17, %cst96_i32 : i32
%35 = spirv.IAdd %33, %34 : i32
%36 = spirv.IMul %11, %cst3072_i32 : i32
%37 = spirv.IAdd %35, %36 : i32
%38 = spirv.IAdd %37, %9 : i32
%39 = spirv.IAdd %38, %15 : i32
%40 = spirv.IAdd %39, %7 : i32
%41 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %41, %30 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_60, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_60 "LocalSize", 1, 32, 1
}
}
}
}
hal.executable private @forward_dispatch_61 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_61_batch_matmul_10x9216x96x64 ordinal(0) layout(#pipeline_layout4) attributes {subgroup_size = 32 : index, translation_info = #translation4, workgroup_size = [64 : index, 2 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c3 = arith.constant 3 : index
%c144 = arith.constant 144 : index
%c10 = arith.constant 10 : index
hal.return %c3, %c144, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Float16, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]> {
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
spirv.func @forward_dispatch_61_batch_matmul_10x9216x96x64() "None" attributes {spirv.entry_point_abi = #spirv.entry_point_abi<subgroup_size = 32>} {
%cst12_i32 = spirv.Constant 12 : i32
%false = spirv.Constant false
%cst5_i32 = spirv.Constant 5 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst110592_i32 = spirv.Constant 110592 : i32
%cst240_i32 = spirv.Constant 240 : i32
%cst402_i32 = spirv.Constant 402 : i32
%cst400_i32 = spirv.Constant 400 : i32
%cst322_i32 = spirv.Constant 322 : i32
%cst480_i32 = spirv.Constant 480 : i32
%cst260_i32 = spirv.Constant 260 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst320_i32 = spirv.Constant 320 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst768_i32 = spirv.Constant 768 : i32
%cst384_i32 = spirv.Constant 384 : i32
%cst192_i32 = spirv.Constant 192 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst73728_i32 = spirv.Constant 73728 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst128_i32 = spirv.Constant 128 : i32
%cst512_i32 = spirv.Constant 512 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f16 = spirv.Constant 0.000000e+00 : f16
%0 = spirv.CompositeConstruct %cst_f16 : (f16) -> !spirv.coopmatrix<16x16xf16, Subgroup>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%1 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%2 = spirv.CompositeExtract %1[0 : i32] : vector<3xi32>
%3 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%4 = spirv.CompositeExtract %3[1 : i32] : vector<3xi32>
%5 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%6 = spirv.CompositeExtract %5[2 : i32] : vector<3xi32>
%__workgroup_mem__3_addr = spirv.mlir.addressof @__workgroup_mem__3 : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>
%__workgroup_mem__4_addr = spirv.mlir.addressof @__workgroup_mem__4 : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>
%7 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%8 = spirv.Load "PushConstant" %7 : i32
%9 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%10 = spirv.Load "PushConstant" %9 : i32
%11 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst2_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<3 x i32, stride=4> [0])>, PushConstant>, i32, i32
%12 = spirv.Load "PushConstant" %11 : i32
%13 = spirv.SLessThan %8, %cst0_i32 : i32
%14 = spirv.ISub %cst-1_i32, %8 : i32
%15 = spirv.Select %13, %14, %8 : i1, i32
%16 = spirv.SDiv %15, %cst16_i32 : i32
%17 = spirv.ISub %cst-1_i32, %16 : i32
%18 = spirv.Select %13, %17, %16 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%19 = spirv.SLessThan %10, %cst0_i32 : i32
%20 = spirv.ISub %cst-1_i32, %10 : i32
%21 = spirv.Select %19, %20, %10 : i1, i32
%22 = spirv.SDiv %21, %cst16_i32 : i32
%23 = spirv.ISub %cst-1_i32, %22 : i32
%24 = spirv.Select %19, %23, %22 : i1, i32
%25 = spirv.SLessThan %12, %cst0_i32 : i32
%26 = spirv.ISub %cst-1_i32, %12 : i32
%27 = spirv.Select %25, %26, %12 : i1, i32
%28 = spirv.SDiv %27, %cst16_i32 : i32
%29 = spirv.ISub %cst-1_i32, %28 : i32
%30 = spirv.Select %25, %29, %28 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%31 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%32 = spirv.CompositeExtract %31[2 : i32] : vector<3xi32>
%33 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%34 = spirv.CompositeExtract %33[1 : i32] : vector<3xi32>
%35 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%36 = spirv.CompositeExtract %35[0 : i32] : vector<3xi32>
%37 = spirv.IMul %34, %cst512_i32 : i32
%38 = spirv.IAdd %37, %2 : i32
%39 = spirv.IMul %4, %cst128_i32 : i32
%40 = spirv.IAdd %38, %39 : i32
%41 = spirv.IMul %6, %cst256_i32 : i32
%42 = spirv.IAdd %40, %41 : i32
%43 = spirv.IMul %32, %cst73728_i32 : i32
%44 = spirv.IAdd %42, %43 : i32
%45 = spirv.IAdd %44, %18 : i32
%46 = spirv.SLessThan %2, %cst0_i32 : i32
%47 = spirv.ISub %cst-1_i32, %2 : i32
%48 = spirv.Select %46, %47, %2 : i1, i32
%49 = spirv.SDiv %48, %cst4_i32 : i32
%50 = spirv.ISub %cst-1_i32, %49 : i32
%51 = spirv.Select %46, %50, %49 : i1, i32
%52 = spirv.IMul %51, %cst4_i32 : i32
%53 = spirv.IAdd %45, %52 : i32
%54 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %53] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%55 = spirv.Load "StorageBuffer" %54 : vector<4xf32>
%56 = spirv.IMul %4, %cst80_i32 : i32
%57 = spirv.IAdd %2, %56 : i32
%58 = spirv.IMul %6, %cst160_i32 : i32
%59 = spirv.IAdd %57, %58 : i32
%60 = spirv.IAdd %59, %51 : i32
%61 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %61, %55 : vector<4xf32>
%62 = spirv.IAdd %53, %cst256_i32 : i32
%63 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %62] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%64 = spirv.Load "StorageBuffer" %63 : vector<4xf32>
%65 = spirv.IAdd %60, %cst160_i32 : i32
%66 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %66, %64 : vector<4xf32>
%67 = spirv.IMul %4, %cst192_i32 : i32
%68 = spirv.IAdd %2, %67 : i32
%69 = spirv.IMul %6, %cst384_i32 : i32
%70 = spirv.IAdd %68, %69 : i32
%71 = spirv.IMul %36, %cst4_i32 : i32
%72 = spirv.IAdd %70, %71 : i32
%73 = spirv.IMul %32, %cst768_i32 : i32
%74 = spirv.IAdd %72, %73 : i32
%75 = spirv.IAdd %74, %24 : i32
%76 = spirv.IMul %51, %cst8_i32 : i32
%77 = spirv.IAdd %75, %76 : i32
%78 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %77] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%79 = spirv.Load "StorageBuffer" %78 : vector<4xf32>
%80 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %60] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %80, %79 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%81 = spirv.IMul %4, %cst160_i32 : i32
%82 = spirv.IMul %6, %cst320_i32 : i32
%83 = spirv.IAdd %81, %82 : i32
%84 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %83] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%85 = spirv.NV.CooperativeMatrixLoad %84, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%86 = spirv.IAdd %83, %cst2_i32 : i32
%87 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %86] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%88 = spirv.NV.CooperativeMatrixLoad %87, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%89 = spirv.IAdd %83, %cst80_i32 : i32
%90 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %89] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%91 = spirv.NV.CooperativeMatrixLoad %90, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%92 = spirv.IAdd %83, %cst82_i32 : i32
%93 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %92] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%94 = spirv.NV.CooperativeMatrixLoad %93, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%95 = spirv.SDiv %48, %cst32_i32 : i32
%96 = spirv.ISub %cst-1_i32, %95 : i32
%97 = spirv.Select %46, %96, %95 : i1, i32
%98 = spirv.IMul %97, %cst2_i32 : i32
%99 = spirv.IAdd %58, %98 : i32
%100 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %99] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%101 = spirv.NV.CooperativeMatrixLoad %100, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%102 = spirv.IAdd %99, %cst80_i32 : i32
%103 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %102] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%104 = spirv.NV.CooperativeMatrixLoad %103, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%105 = spirv.NV.CooperativeMatrixMulAdd %85, %101, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%106 = spirv.NV.CooperativeMatrixMulAdd %88, %104, %105 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%107 = spirv.NV.CooperativeMatrixMulAdd %91, %101, %0 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%108 = spirv.NV.CooperativeMatrixMulAdd %94, %104, %107 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%109 = spirv.IAdd %53, %cst4_i32 : i32
%110 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%111 = spirv.Load "StorageBuffer" %110 : vector<4xf32>
%112 = spirv.IAdd %60, %cst320_i32 : i32
%113 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %113, %111 : vector<4xf32>
%114 = spirv.IAdd %53, %cst260_i32 : i32
%115 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %114] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%116 = spirv.Load "StorageBuffer" %115 : vector<4xf32>
%117 = spirv.IAdd %60, %cst480_i32 : i32
%118 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %117] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %118, %116 : vector<4xf32>
%119 = spirv.IAdd %77, %cst384_i32 : i32
%120 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %119] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
%121 = spirv.Load "StorageBuffer" %120 : vector<4xf32>
%122 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %65] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
spirv.Store "Workgroup" %122, %121 : vector<4xf32>
spirv.ControlBarrier <Workgroup>, <Workgroup>, <AcquireRelease|WorkgroupMemory>
%123 = spirv.IAdd %83, %cst320_i32 : i32
%124 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %123] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%125 = spirv.NV.CooperativeMatrixLoad %124, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%126 = spirv.IAdd %83, %cst322_i32 : i32
%127 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %126] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%128 = spirv.NV.CooperativeMatrixLoad %127, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%129 = spirv.IAdd %83, %cst400_i32 : i32
%130 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %129] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%131 = spirv.NV.CooperativeMatrixLoad %130, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%132 = spirv.IAdd %83, %cst402_i32 : i32
%133 = spirv.AccessChain %__workgroup_mem__3_addr[%cst0_i32, %132] : !spirv.ptr<!spirv.struct<(!spirv.array<640 x vector<4xf32>>)>, Workgroup>, i32, i32
%134 = spirv.NV.CooperativeMatrixLoad %133, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%135 = spirv.IAdd %99, %cst160_i32 : i32
%136 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %135] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%137 = spirv.NV.CooperativeMatrixLoad %136, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%138 = spirv.IAdd %99, %cst240_i32 : i32
%139 = spirv.AccessChain %__workgroup_mem__4_addr[%cst0_i32, %138] : !spirv.ptr<!spirv.struct<(!spirv.array<320 x vector<4xf32>>)>, Workgroup>, i32, i32
%140 = spirv.NV.CooperativeMatrixLoad %139, %cst5_i32, %false : !spirv.ptr<vector<4xf32>, Workgroup> as !spirv.coopmatrix<16x16xf16, Subgroup>
%141 = spirv.NV.CooperativeMatrixMulAdd %125, %137, %106 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%142 = spirv.NV.CooperativeMatrixMulAdd %128, %140, %141 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%143 = spirv.NV.CooperativeMatrixMulAdd %131, %137, %108 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%144 = spirv.NV.CooperativeMatrixMulAdd %134, %140, %143 : !spirv.coopmatrix<16x16xf16, Subgroup>, !spirv.coopmatrix<16x16xf16, Subgroup> -> !spirv.coopmatrix<16x16xf16, Subgroup>
%145 = spirv.IMul %32, %cst110592_i32 : i32
%146 = spirv.IMul %6, %cst110592_i32 : i32
%147 = spirv.IAdd %145, %146 : i32
%148 = spirv.IMul %34, %cst768_i32 : i32
%149 = spirv.IAdd %147, %148 : i32
%150 = spirv.IMul %4, %cst384_i32 : i32
%151 = spirv.IAdd %149, %150 : i32
%152 = spirv.IAdd %151, %71 : i32
%153 = spirv.IAdd %152, %30 : i32
%154 = spirv.IAdd %153, %98 : i32
%155 = spirv.IAdd %154, %cst192_i32 : i32
%156 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %155] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %156, %144, %cst12_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
%157 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<vector<4xf32>, stride=16> [0])>, StorageBuffer>, i32, i32
spirv.NV.CooperativeMatrixStore %157, %142, %cst12_i32, %false : !spirv.ptr<vector<4xf32>, StorageBuffer>, !spirv.coopmatrix<16x16xf16, Subgroup>
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_61_batch_matmul_10x9216x96x64, @__builtin_var_LocalInvocationId__, @__builtin_var_WorkgroupId__
spirv.ExecutionMode @forward_dispatch_61_batch_matmul_10x9216x96x64 "LocalSize", 64, 2, 1
}
}
}
}
hal.executable private @forward_dispatch_62 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_62 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation, workgroup_size = [1 : index, 32 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c77 = arith.constant 77 : index
%c288 = arith.constant 288 : index
%c10 = arith.constant 10 : index
hal.return %c77, %c288, %c10 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_62() "None" {
%cst1_i32 = spirv.Constant 1 : i32
%cst2464_i32 = spirv.Constant 2464 : i32
%cst77_i32 = spirv.Constant 77 : i32
%cst709632_i32 = spirv.Constant 709632 : i32
%cst3072_i32 = spirv.Constant 3072 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst884736_i32 = spirv.Constant 884736 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[0 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[2 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%24 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%25 = spirv.CompositeExtract %24[1 : i32] : vector<3xi32>
%26 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%27 = spirv.CompositeExtract %26[2 : i32] : vector<3xi32>
%28 = spirv.IMul %21, %cst884736_i32 : i32
%29 = spirv.IMul %27, %cst884736_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IMul %25, %cst96_i32 : i32
%32 = spirv.IAdd %30, %31 : i32
%33 = spirv.IMul %19, %cst3072_i32 : i32
%34 = spirv.IAdd %32, %33 : i32
%35 = spirv.IAdd %34, %17 : i32
%36 = spirv.IAdd %35, %23 : i32
%37 = spirv.IAdd %36, %9 : i32
%38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%39 = spirv.Load "StorageBuffer" %38 : f16
%40 = spirv.IMul %21, %cst709632_i32 : i32
%41 = spirv.IMul %27, %cst709632_i32 : i32
%42 = spirv.IAdd %40, %41 : i32
%43 = spirv.IMul %25, %cst77_i32 : i32
%44 = spirv.IAdd %42, %43 : i32
%45 = spirv.IMul %19, %cst2464_i32 : i32
%46 = spirv.IAdd %44, %45 : i32
%47 = spirv.IAdd %46, %17 : i32
%48 = spirv.IAdd %47, %23 : i32
%49 = spirv.IAdd %48, %15 : i32
%50 = spirv.AccessChain %__resource_var_0_1__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
spirv.Store "StorageBuffer" %50, %39 : f16
spirv.Return
}
spirv.EntryPoint "GLCompute" @forward_dispatch_62, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spirv.ExecutionMode @forward_dispatch_62 "LocalSize", 1, 32, 1
}
}
}
}
hal.executable private @forward_dispatch_63 {
hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
hal.executable.export public @forward_dispatch_63 ordinal(0) layout(#pipeline_layout1) attributes {translation_info = #translation1, workgroup_size = [32 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%c72 = arith.constant 72 : index
%c5 = arith.constant 5 : index
%c2 = arith.constant 2 : index
hal.return %c72, %c5, %c2 : index, index, index
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, StorageBuffer16BitAccess, Float16], [SPV_KHR_16bit_storage, SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi32>, Input>
spirv.GlobalVariable @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
spirv.GlobalVariable @__resource_var_0_0_ bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.GlobalVariable @__resource_var_0_1_ bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
spirv.func @forward_dispatch_63() "None" {
%cst307_i32 = spirv.Constant 307 : i32
%cst306_i32 = spirv.Constant 306 : i32
%cst305_i32 = spirv.Constant 305 : i32
%cst304_i32 = spirv.Constant 304 : i32
%cst303_i32 = spirv.Constant 303 : i32
%cst302_i32 = spirv.Constant 302 : i32
%cst301_i32 = spirv.Constant 301 : i32
%cst300_i32 = spirv.Constant 300 : i32
%cst299_i32 = spirv.Constant 299 : i32
%cst298_i32 = spirv.Constant 298 : i32
%cst297_i32 = spirv.Constant 297 : i32
%cst296_i32 = spirv.Constant 296 : i32
%cst295_i32 = spirv.Constant 295 : i32
%cst294_i32 = spirv.Constant 294 : i32
%cst293_i32 = spirv.Constant 293 : i32
%cst292_i32 = spirv.Constant 292 : i32
%cst291_i32 = spirv.Constant 291 : i32
%cst290_i32 = spirv.Constant 290 : i32
%cst289_i32 = spirv.Constant 289 : i32
%cst288_i32 = spirv.Constant 288 : i32
%cst287_i32 = spirv.Constant 287 : i32
%cst286_i32 = spirv.Constant 286 : i32
%cst285_i32 = spirv.Constant 285 : i32
%cst284_i32 = spirv.Constant 284 : i32
%cst283_i32 = spirv.Constant 283 : i32
%cst282_i32 = spirv.Constant 282 : i32
%cst281_i32 = spirv.Constant 281 : i32
%cst280_i32 = spirv.Constant 280 : i32
%cst279_i32 = spirv.Constant 279 : i32
%cst278_i32 = spirv.Constant 278 : i32
%cst277_i32 = spirv.Constant 277 : i32
%cst276_i32 = spirv.Constant 276 : i32
%cst275_i32 = spirv.Constant 275 : i32
%cst274_i32 = spirv.Constant 274 : i32
%cst273_i32 = spirv.Constant 273 : i32
%cst272_i32 = spirv.Constant 272 : i32
%cst271_i32 = spirv.Constant 271 : i32
%cst270_i32 = spirv.Constant 270 : i32
%cst269_i32 = spirv.Constant 269 : i32
%cst268_i32 = spirv.Constant 268 : i32
%cst267_i32 = spirv.Constant 267 : i32
%cst266_i32 = spirv.Constant 266 : i32
%cst265_i32 = spirv.Constant 265 : i32
%cst264_i32 = spirv.Constant 264 : i32
%cst263_i32 = spirv.Constant 263 : i32
%cst262_i32 = spirv.Constant 262 : i32
%cst261_i32 = spirv.Constant 261 : i32
%cst260_i32 = spirv.Constant 260 : i32
%cst259_i32 = spirv.Constant 259 : i32
%cst258_i32 = spirv.Constant 258 : i32
%cst257_i32 = spirv.Constant 257 : i32
%cst256_i32 = spirv.Constant 256 : i32
%cst255_i32 = spirv.Constant 255 : i32
%cst254_i32 = spirv.Constant 254 : i32
%cst253_i32 = spirv.Constant 253 : i32
%cst252_i32 = spirv.Constant 252 : i32
%cst251_i32 = spirv.Constant 251 : i32
%cst250_i32 = spirv.Constant 250 : i32
%cst249_i32 = spirv.Constant 249 : i32
%cst248_i32 = spirv.Constant 248 : i32
%cst247_i32 = spirv.Constant 247 : i32
%cst246_i32 = spirv.Constant 246 : i32
%cst245_i32 = spirv.Constant 245 : i32
%cst244_i32 = spirv.Constant 244 : i32
%cst243_i32 = spirv.Constant 243 : i32
%cst242_i32 = spirv.Constant 242 : i32
%cst241_i32 = spirv.Constant 241 : i32
%cst240_i32 = spirv.Constant 240 : i32
%cst239_i32 = spirv.Constant 239 : i32
%cst238_i32 = spirv.Constant 238 : i32
%cst237_i32 = spirv.Constant 237 : i32
%cst236_i32 = spirv.Constant 236 : i32
%cst235_i32 = spirv.Constant 235 : i32
%cst234_i32 = spirv.Constant 234 : i32
%cst233_i32 = spirv.Constant 233 : i32
%cst232_i32 = spirv.Constant 232 : i32
%cst231_i32 = spirv.Constant 231 : i32
%cst230_i32 = spirv.Constant 230 : i32
%cst229_i32 = spirv.Constant 229 : i32
%cst228_i32 = spirv.Constant 228 : i32
%cst227_i32 = spirv.Constant 227 : i32
%cst226_i32 = spirv.Constant 226 : i32
%cst225_i32 = spirv.Constant 225 : i32
%cst224_i32 = spirv.Constant 224 : i32
%cst223_i32 = spirv.Constant 223 : i32
%cst222_i32 = spirv.Constant 222 : i32
%cst221_i32 = spirv.Constant 221 : i32
%cst220_i32 = spirv.Constant 220 : i32
%cst219_i32 = spirv.Constant 219 : i32
%cst218_i32 = spirv.Constant 218 : i32
%cst217_i32 = spirv.Constant 217 : i32
%cst216_i32 = spirv.Constant 216 : i32
%cst215_i32 = spirv.Constant 215 : i32
%cst214_i32 = spirv.Constant 214 : i32
%cst213_i32 = spirv.Constant 213 : i32
%cst212_i32 = spirv.Constant 212 : i32
%cst211_i32 = spirv.Constant 211 : i32
%cst210_i32 = spirv.Constant 210 : i32
%cst209_i32 = spirv.Constant 209 : i32
%cst208_i32 = spirv.Constant 208 : i32
%cst207_i32 = spirv.Constant 207 : i32
%cst206_i32 = spirv.Constant 206 : i32
%cst205_i32 = spirv.Constant 205 : i32
%cst204_i32 = spirv.Constant 204 : i32
%cst203_i32 = spirv.Constant 203 : i32
%cst202_i32 = spirv.Constant 202 : i32
%cst201_i32 = spirv.Constant 201 : i32
%cst200_i32 = spirv.Constant 200 : i32
%cst199_i32 = spirv.Constant 199 : i32
%cst198_i32 = spirv.Constant 198 : i32
%cst197_i32 = spirv.Constant 197 : i32
%cst196_i32 = spirv.Constant 196 : i32
%cst195_i32 = spirv.Constant 195 : i32
%cst194_i32 = spirv.Constant 194 : i32
%cst193_i32 = spirv.Constant 193 : i32
%cst192_i32 = spirv.Constant 192 : i32
%cst191_i32 = spirv.Constant 191 : i32
%cst190_i32 = spirv.Constant 190 : i32
%cst189_i32 = spirv.Constant 189 : i32
%cst188_i32 = spirv.Constant 188 : i32
%cst187_i32 = spirv.Constant 187 : i32
%cst186_i32 = spirv.Constant 186 : i32
%cst185_i32 = spirv.Constant 185 : i32
%cst184_i32 = spirv.Constant 184 : i32
%cst183_i32 = spirv.Constant 183 : i32
%cst182_i32 = spirv.Constant 182 : i32
%cst181_i32 = spirv.Constant 181 : i32
%cst180_i32 = spirv.Constant 180 : i32
%cst179_i32 = spirv.Constant 179 : i32
%cst178_i32 = spirv.Constant 178 : i32
%cst177_i32 = spirv.Constant 177 : i32
%cst176_i32 = spirv.Constant 176 : i32
%cst175_i32 = spirv.Constant 175 : i32
%cst174_i32 = spirv.Constant 174 : i32
%cst173_i32 = spirv.Constant 173 : i32
%cst172_i32 = spirv.Constant 172 : i32
%cst171_i32 = spirv.Constant 171 : i32
%cst170_i32 = spirv.Constant 170 : i32
%cst169_i32 = spirv.Constant 169 : i32
%cst168_i32 = spirv.Constant 168 : i32
%cst167_i32 = spirv.Constant 167 : i32
%cst166_i32 = spirv.Constant 166 : i32
%cst165_i32 = spirv.Constant 165 : i32
%cst164_i32 = spirv.Constant 164 : i32
%cst163_i32 = spirv.Constant 163 : i32
%cst162_i32 = spirv.Constant 162 : i32
%cst161_i32 = spirv.Constant 161 : i32
%cst160_i32 = spirv.Constant 160 : i32
%cst159_i32 = spirv.Constant 159 : i32
%cst158_i32 = spirv.Constant 158 : i32
%cst157_i32 = spirv.Constant 157 : i32
%cst156_i32 = spirv.Constant 156 : i32
%cst155_i32 = spirv.Constant 155 : i32
%cst154_i32 = spirv.Constant 154 : i32
%cst153_i32 = spirv.Constant 153 : i32
%cst152_i32 = spirv.Constant 152 : i32
%cst151_i32 = spirv.Constant 151 : i32
%cst150_i32 = spirv.Constant 150 : i32
%cst149_i32 = spirv.Constant 149 : i32
%cst148_i32 = spirv.Constant 148 : i32
%cst147_i32 = spirv.Constant 147 : i32
%cst146_i32 = spirv.Constant 146 : i32
%cst145_i32 = spirv.Constant 145 : i32
%cst144_i32 = spirv.Constant 144 : i32
%cst143_i32 = spirv.Constant 143 : i32
%cst142_i32 = spirv.Constant 142 : i32
%cst141_i32 = spirv.Constant 141 : i32
%cst140_i32 = spirv.Constant 140 : i32
%cst139_i32 = spirv.Constant 139 : i32
%cst138_i32 = spirv.Constant 138 : i32
%cst137_i32 = spirv.Constant 137 : i32
%cst136_i32 = spirv.Constant 136 : i32
%cst135_i32 = spirv.Constant 135 : i32
%cst134_i32 = spirv.Constant 134 : i32
%cst133_i32 = spirv.Constant 133 : i32
%cst132_i32 = spirv.Constant 132 : i32
%cst131_i32 = spirv.Constant 131 : i32
%cst130_i32 = spirv.Constant 130 : i32
%cst129_i32 = spirv.Constant 129 : i32
%cst128_i32 = spirv.Constant 128 : i32
%cst127_i32 = spirv.Constant 127 : i32
%cst126_i32 = spirv.Constant 126 : i32
%cst125_i32 = spirv.Constant 125 : i32
%cst124_i32 = spirv.Constant 124 : i32
%cst123_i32 = spirv.Constant 123 : i32
%cst122_i32 = spirv.Constant 122 : i32
%cst121_i32 = spirv.Constant 121 : i32
%cst120_i32 = spirv.Constant 120 : i32
%cst119_i32 = spirv.Constant 119 : i32
%cst118_i32 = spirv.Constant 118 : i32
%cst117_i32 = spirv.Constant 117 : i32
%cst116_i32 = spirv.Constant 116 : i32
%cst115_i32 = spirv.Constant 115 : i32
%cst114_i32 = spirv.Constant 114 : i32
%cst113_i32 = spirv.Constant 113 : i32
%cst112_i32 = spirv.Constant 112 : i32
%cst111_i32 = spirv.Constant 111 : i32
%cst110_i32 = spirv.Constant 110 : i32
%cst109_i32 = spirv.Constant 109 : i32
%cst108_i32 = spirv.Constant 108 : i32
%cst107_i32 = spirv.Constant 107 : i32
%cst106_i32 = spirv.Constant 106 : i32
%cst105_i32 = spirv.Constant 105 : i32
%cst104_i32 = spirv.Constant 104 : i32
%cst103_i32 = spirv.Constant 103 : i32
%cst102_i32 = spirv.Constant 102 : i32
%cst101_i32 = spirv.Constant 101 : i32
%cst100_i32 = spirv.Constant 100 : i32
%cst99_i32 = spirv.Constant 99 : i32
%cst98_i32 = spirv.Constant 98 : i32
%cst97_i32 = spirv.Constant 97 : i32
%cst96_i32 = spirv.Constant 96 : i32
%cst95_i32 = spirv.Constant 95 : i32
%cst94_i32 = spirv.Constant 94 : i32
%cst93_i32 = spirv.Constant 93 : i32
%cst92_i32 = spirv.Constant 92 : i32
%cst91_i32 = spirv.Constant 91 : i32
%cst90_i32 = spirv.Constant 90 : i32
%cst89_i32 = spirv.Constant 89 : i32
%cst88_i32 = spirv.Constant 88 : i32
%cst87_i32 = spirv.Constant 87 : i32
%cst86_i32 = spirv.Constant 86 : i32
%cst85_i32 = spirv.Constant 85 : i32
%cst84_i32 = spirv.Constant 84 : i32
%cst83_i32 = spirv.Constant 83 : i32
%cst82_i32 = spirv.Constant 82 : i32
%cst81_i32 = spirv.Constant 81 : i32
%cst80_i32 = spirv.Constant 80 : i32
%cst79_i32 = spirv.Constant 79 : i32
%cst78_i32 = spirv.Constant 78 : i32
%cst77_i32 = spirv.Constant 77 : i32
%cst76_i32 = spirv.Constant 76 : i32
%cst75_i32 = spirv.Constant 75 : i32
%cst74_i32 = spirv.Constant 74 : i32
%cst73_i32 = spirv.Constant 73 : i32
%cst72_i32 = spirv.Constant 72 : i32
%cst71_i32 = spirv.Constant 71 : i32
%cst70_i32 = spirv.Constant 70 : i32
%cst69_i32 = spirv.Constant 69 : i32
%cst68_i32 = spirv.Constant 68 : i32
%cst67_i32 = spirv.Constant 67 : i32
%cst66_i32 = spirv.Constant 66 : i32
%cst65_i32 = spirv.Constant 65 : i32
%cst64_i32 = spirv.Constant 64 : i32
%cst63_i32 = spirv.Constant 63 : i32
%cst62_i32 = spirv.Constant 62 : i32
%cst61_i32 = spirv.Constant 61 : i32
%cst60_i32 = spirv.Constant 60 : i32
%cst59_i32 = spirv.Constant 59 : i32
%cst58_i32 = spirv.Constant 58 : i32
%cst57_i32 = spirv.Constant 57 : i32
%cst56_i32 = spirv.Constant 56 : i32
%cst55_i32 = spirv.Constant 55 : i32
%cst54_i32 = spirv.Constant 54 : i32
%cst53_i32 = spirv.Constant 53 : i32
%cst52_i32 = spirv.Constant 52 : i32
%cst51_i32 = spirv.Constant 51 : i32
%cst50_i32 = spirv.Constant 50 : i32
%cst49_i32 = spirv.Constant 49 : i32
%cst48_i32 = spirv.Constant 48 : i32
%cst47_i32 = spirv.Constant 47 : i32
%cst46_i32 = spirv.Constant 46 : i32
%cst45_i32 = spirv.Constant 45 : i32
%cst44_i32 = spirv.Constant 44 : i32
%cst43_i32 = spirv.Constant 43 : i32
%cst42_i32 = spirv.Constant 42 : i32
%cst41_i32 = spirv.Constant 41 : i32
%cst40_i32 = spirv.Constant 40 : i32
%cst39_i32 = spirv.Constant 39 : i32
%cst38_i32 = spirv.Constant 38 : i32
%cst37_i32 = spirv.Constant 37 : i32
%cst36_i32 = spirv.Constant 36 : i32
%cst35_i32 = spirv.Constant 35 : i32
%cst34_i32 = spirv.Constant 34 : i32
%cst33_i32 = spirv.Constant 33 : i32
%cst32_i32 = spirv.Constant 32 : i32
%cst31_i32 = spirv.Constant 31 : i32
%cst30_i32 = spirv.Constant 30 : i32
%cst29_i32 = spirv.Constant 29 : i32
%cst28_i32 = spirv.Constant 28 : i32
%cst27_i32 = spirv.Constant 27 : i32
%cst26_i32 = spirv.Constant 26 : i32
%cst25_i32 = spirv.Constant 25 : i32
%cst24_i32 = spirv.Constant 24 : i32
%cst23_i32 = spirv.Constant 23 : i32
%cst22_i32 = spirv.Constant 22 : i32
%cst21_i32 = spirv.Constant 21 : i32
%cst20_i32 = spirv.Constant 20 : i32
%cst19_i32 = spirv.Constant 19 : i32
%cst18_i32 = spirv.Constant 18 : i32
%cst17_i32 = spirv.Constant 17 : i32
%cst16_i32 = spirv.Constant 16 : i32
%cst15_i32 = spirv.Constant 15 : i32
%cst14_i32 = spirv.Constant 14 : i32
%cst13_i32 = spirv.Constant 13 : i32
%cst12_i32 = spirv.Constant 12 : i32
%cst11_i32 = spirv.Constant 11 : i32
%cst10_i32 = spirv.Constant 10 : i32
%cst9_i32 = spirv.Constant 9 : i32
%cst8_i32 = spirv.Constant 8 : i32
%cst7_i32 = spirv.Constant 7 : i32
%cst6_i32 = spirv.Constant 6 : i32
%cst5_i32 = spirv.Constant 5 : i32
%cst4_i32 = spirv.Constant 4 : i32
%cst3_i32 = spirv.Constant 3 : i32
%cst1_i32 = spirv.Constant 1 : i32
%cst709632_i32 = spirv.Constant 709632 : i32
%cst3548160_i32 = spirv.Constant 3548160 : i32
%cst308_i32 = spirv.Constant 308 : i32
%cst9856_i32 = spirv.Constant 9856 : i32
%cst-1_i32 = spirv.Constant -1 : i32
%cst2_i32 = spirv.Constant 2 : i32
%cst0_i32 = spirv.Constant 0 : i32
%cst_f32 = spirv.Constant 0.693147182 : f32
%cst_f32_0 = spirv.Constant 1.44269502 : f32
%cst_f32_1 = spirv.Constant 1.000000e+00 : f32
%cst_f32_2 = spirv.Constant 0.499705136 : f32
%cst_f32_3 = spirv.Constant 0.168738902 : f32
%cst_f32_4 = spirv.Constant 0.0366896503 : f32
%cst_f32_5 = spirv.Constant 1.314350e-02 : f32
%cst_f32_6 = spirv.Constant 0.000000e+00 : f32
%cst_f32_7 = spirv.Constant 0x7F800000 : f32
%cst_f32_8 = spirv.Constant 0xFF800000 : f32
%cst_f32_9 = spirv.Constant 1.17549435E-38 : f32
%cst-127_i32 = spirv.Constant -127 : i32
%cst_vec_4xf16 = spirv.Constant dense<0.000000e+00> : vector<4xf16>
%__push_constant_var___addr = spirv.mlir.addressof @__push_constant_var__ : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>
%0 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst0_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%1 = spirv.Load "PushConstant" %0 : i32
%2 = spirv.AccessChain %__push_constant_var___addr[%cst0_i32, %cst1_i32] : !spirv.ptr<!spirv.struct<(!spirv.array<2 x i32, stride=4> [0])>, PushConstant>, i32, i32
%3 = spirv.Load "PushConstant" %2 : i32
%4 = spirv.SLessThan %1, %cst0_i32 : i32
%5 = spirv.ISub %cst-1_i32, %1 : i32
%6 = spirv.Select %4, %5, %1 : i1, i32
%7 = spirv.SDiv %6, %cst2_i32 : i32
%8 = spirv.ISub %cst-1_i32, %7 : i32
%9 = spirv.Select %4, %8, %7 : i1, i32
%__resource_var_0_0__addr = spirv.mlir.addressof @__resource_var_0_0_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%10 = spirv.SLessThan %3, %cst0_i32 : i32
%11 = spirv.ISub %cst-1_i32, %3 : i32
%12 = spirv.Select %10, %11, %3 : i1, i32
%13 = spirv.SDiv %12, %cst2_i32 : i32
%14 = spirv.ISub %cst-1_i32, %13 : i32
%15 = spirv.Select %10, %14, %13 : i1, i32
%__resource_var_0_1__addr = spirv.mlir.addressof @__resource_var_0_1_ : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>
%__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi32>, Input>
%16 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%17 = spirv.CompositeExtract %16[2 : i32] : vector<3xi32>
%18 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%19 = spirv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi32>
%21 = spirv.CompositeExtract %20[0 : i32] : vector<3xi32>
%__builtin_var_LocalInvocationId___addr = spirv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
%22 = spirv.Load "Input" %__builtin_var_LocalInvocationId___addr : vector<3xi32>
%23 = spirv.CompositeExtract %22[0 : i32] : vector<3xi32>
%24 = spirv.IMul %21, %cst9856_i32 : i32
%25 = spirv.IMul %23, %cst308_i32 : i32
%26 = spirv.IAdd %24, %25 : i32
%27 = spirv.IMul %17, %cst3548160_i32 : i32
%28 = spirv.IAdd %26, %27 : i32
%29 = spirv.IMul %19, %cst709632_i32 : i32
%30 = spirv.IAdd %28, %29 : i32
%31 = spirv.IAdd %30, %9 : i32
%32 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %31] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%33 = spirv.Load "StorageBuffer" %32 : f16
%34 = spirv.IAdd %31, %cst1_i32 : i32
%35 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %34] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%36 = spirv.Load "StorageBuffer" %35 : f16
%37 = spirv.IAdd %31, %cst2_i32 : i32
%38 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %37] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%39 = spirv.Load "StorageBuffer" %38 : f16
%40 = spirv.IAdd %31, %cst3_i32 : i32
%41 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %40] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%42 = spirv.Load "StorageBuffer" %41 : f16
%43 = spirv.IAdd %31, %cst4_i32 : i32
%44 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %43] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%45 = spirv.Load "StorageBuffer" %44 : f16
%46 = spirv.IAdd %31, %cst5_i32 : i32
%47 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %46] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%48 = spirv.Load "StorageBuffer" %47 : f16
%49 = spirv.IAdd %31, %cst6_i32 : i32
%50 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %49] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%51 = spirv.Load "StorageBuffer" %50 : f16
%52 = spirv.IAdd %31, %cst7_i32 : i32
%53 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %52] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%54 = spirv.Load "StorageBuffer" %53 : f16
%55 = spirv.IAdd %31, %cst8_i32 : i32
%56 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %55] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%57 = spirv.Load "StorageBuffer" %56 : f16
%58 = spirv.IAdd %31, %cst9_i32 : i32
%59 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %58] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%60 = spirv.Load "StorageBuffer" %59 : f16
%61 = spirv.IAdd %31, %cst10_i32 : i32
%62 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %61] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%63 = spirv.Load "StorageBuffer" %62 : f16
%64 = spirv.IAdd %31, %cst11_i32 : i32
%65 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %64] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%66 = spirv.Load "StorageBuffer" %65 : f16
%67 = spirv.IAdd %31, %cst12_i32 : i32
%68 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %67] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%69 = spirv.Load "StorageBuffer" %68 : f16
%70 = spirv.IAdd %31, %cst13_i32 : i32
%71 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %70] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%72 = spirv.Load "StorageBuffer" %71 : f16
%73 = spirv.IAdd %31, %cst14_i32 : i32
%74 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %73] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%75 = spirv.Load "StorageBuffer" %74 : f16
%76 = spirv.IAdd %31, %cst15_i32 : i32
%77 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %76] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%78 = spirv.Load "StorageBuffer" %77 : f16
%79 = spirv.IAdd %31, %cst16_i32 : i32
%80 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %79] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%81 = spirv.Load "StorageBuffer" %80 : f16
%82 = spirv.IAdd %31, %cst17_i32 : i32
%83 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %82] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%84 = spirv.Load "StorageBuffer" %83 : f16
%85 = spirv.IAdd %31, %cst18_i32 : i32
%86 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %85] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%87 = spirv.Load "StorageBuffer" %86 : f16
%88 = spirv.IAdd %31, %cst19_i32 : i32
%89 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %88] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%90 = spirv.Load "StorageBuffer" %89 : f16
%91 = spirv.IAdd %31, %cst20_i32 : i32
%92 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %91] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%93 = spirv.Load "StorageBuffer" %92 : f16
%94 = spirv.IAdd %31, %cst21_i32 : i32
%95 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %94] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%96 = spirv.Load "StorageBuffer" %95 : f16
%97 = spirv.IAdd %31, %cst22_i32 : i32
%98 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %97] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%99 = spirv.Load "StorageBuffer" %98 : f16
%100 = spirv.IAdd %31, %cst23_i32 : i32
%101 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %100] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%102 = spirv.Load "StorageBuffer" %101 : f16
%103 = spirv.IAdd %31, %cst24_i32 : i32
%104 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %103] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%105 = spirv.Load "StorageBuffer" %104 : f16
%106 = spirv.IAdd %31, %cst25_i32 : i32
%107 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %106] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%108 = spirv.Load "StorageBuffer" %107 : f16
%109 = spirv.IAdd %31, %cst26_i32 : i32
%110 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %109] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%111 = spirv.Load "StorageBuffer" %110 : f16
%112 = spirv.IAdd %31, %cst27_i32 : i32
%113 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %112] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%114 = spirv.Load "StorageBuffer" %113 : f16
%115 = spirv.IAdd %31, %cst28_i32 : i32
%116 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %115] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%117 = spirv.Load "StorageBuffer" %116 : f16
%118 = spirv.IAdd %31, %cst29_i32 : i32
%119 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %118] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%120 = spirv.Load "StorageBuffer" %119 : f16
%121 = spirv.IAdd %31, %cst30_i32 : i32
%122 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %121] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%123 = spirv.Load "StorageBuffer" %122 : f16
%124 = spirv.IAdd %31, %cst31_i32 : i32
%125 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %124] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%126 = spirv.Load "StorageBuffer" %125 : f16
%127 = spirv.IAdd %31, %cst32_i32 : i32
%128 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %127] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%129 = spirv.Load "StorageBuffer" %128 : f16
%130 = spirv.IAdd %31, %cst33_i32 : i32
%131 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %130] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%132 = spirv.Load "StorageBuffer" %131 : f16
%133 = spirv.IAdd %31, %cst34_i32 : i32
%134 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %133] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%135 = spirv.Load "StorageBuffer" %134 : f16
%136 = spirv.IAdd %31, %cst35_i32 : i32
%137 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %136] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%138 = spirv.Load "StorageBuffer" %137 : f16
%139 = spirv.IAdd %31, %cst36_i32 : i32
%140 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %139] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%141 = spirv.Load "StorageBuffer" %140 : f16
%142 = spirv.IAdd %31, %cst37_i32 : i32
%143 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %142] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%144 = spirv.Load "StorageBuffer" %143 : f16
%145 = spirv.IAdd %31, %cst38_i32 : i32
%146 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %145] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%147 = spirv.Load "StorageBuffer" %146 : f16
%148 = spirv.IAdd %31, %cst39_i32 : i32
%149 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %148] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%150 = spirv.Load "StorageBuffer" %149 : f16
%151 = spirv.IAdd %31, %cst40_i32 : i32
%152 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %151] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%153 = spirv.Load "StorageBuffer" %152 : f16
%154 = spirv.IAdd %31, %cst41_i32 : i32
%155 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %154] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%156 = spirv.Load "StorageBuffer" %155 : f16
%157 = spirv.IAdd %31, %cst42_i32 : i32
%158 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %157] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%159 = spirv.Load "StorageBuffer" %158 : f16
%160 = spirv.IAdd %31, %cst43_i32 : i32
%161 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %160] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%162 = spirv.Load "StorageBuffer" %161 : f16
%163 = spirv.IAdd %31, %cst44_i32 : i32
%164 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %163] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%165 = spirv.Load "StorageBuffer" %164 : f16
%166 = spirv.IAdd %31, %cst45_i32 : i32
%167 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %166] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%168 = spirv.Load "StorageBuffer" %167 : f16
%169 = spirv.IAdd %31, %cst46_i32 : i32
%170 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %169] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%171 = spirv.Load "StorageBuffer" %170 : f16
%172 = spirv.IAdd %31, %cst47_i32 : i32
%173 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %172] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%174 = spirv.Load "StorageBuffer" %173 : f16
%175 = spirv.IAdd %31, %cst48_i32 : i32
%176 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %175] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%177 = spirv.Load "StorageBuffer" %176 : f16
%178 = spirv.IAdd %31, %cst49_i32 : i32
%179 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %178] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%180 = spirv.Load "StorageBuffer" %179 : f16
%181 = spirv.IAdd %31, %cst50_i32 : i32
%182 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %181] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%183 = spirv.Load "StorageBuffer" %182 : f16
%184 = spirv.IAdd %31, %cst51_i32 : i32
%185 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %184] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%186 = spirv.Load "StorageBuffer" %185 : f16
%187 = spirv.IAdd %31, %cst52_i32 : i32
%188 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %187] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%189 = spirv.Load "StorageBuffer" %188 : f16
%190 = spirv.IAdd %31, %cst53_i32 : i32
%191 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %190] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%192 = spirv.Load "StorageBuffer" %191 : f16
%193 = spirv.IAdd %31, %cst54_i32 : i32
%194 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %193] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%195 = spirv.Load "StorageBuffer" %194 : f16
%196 = spirv.IAdd %31, %cst55_i32 : i32
%197 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %196] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%198 = spirv.Load "StorageBuffer" %197 : f16
%199 = spirv.IAdd %31, %cst56_i32 : i32
%200 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %199] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%201 = spirv.Load "StorageBuffer" %200 : f16
%202 = spirv.IAdd %31, %cst57_i32 : i32
%203 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %202] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%204 = spirv.Load "StorageBuffer" %203 : f16
%205 = spirv.IAdd %31, %cst58_i32 : i32
%206 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %205] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%207 = spirv.Load "StorageBuffer" %206 : f16
%208 = spirv.IAdd %31, %cst59_i32 : i32
%209 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %208] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%210 = spirv.Load "StorageBuffer" %209 : f16
%211 = spirv.IAdd %31, %cst60_i32 : i32
%212 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %211] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%213 = spirv.Load "StorageBuffer" %212 : f16
%214 = spirv.IAdd %31, %cst61_i32 : i32
%215 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %214] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%216 = spirv.Load "StorageBuffer" %215 : f16
%217 = spirv.IAdd %31, %cst62_i32 : i32
%218 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %217] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%219 = spirv.Load "StorageBuffer" %218 : f16
%220 = spirv.IAdd %31, %cst63_i32 : i32
%221 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %220] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%222 = spirv.Load "StorageBuffer" %221 : f16
%223 = spirv.IAdd %31, %cst64_i32 : i32
%224 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %223] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%225 = spirv.Load "StorageBuffer" %224 : f16
%226 = spirv.IAdd %31, %cst65_i32 : i32
%227 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %226] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%228 = spirv.Load "StorageBuffer" %227 : f16
%229 = spirv.IAdd %31, %cst66_i32 : i32
%230 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %229] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%231 = spirv.Load "StorageBuffer" %230 : f16
%232 = spirv.IAdd %31, %cst67_i32 : i32
%233 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %232] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%234 = spirv.Load "StorageBuffer" %233 : f16
%235 = spirv.IAdd %31, %cst68_i32 : i32
%236 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %235] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%237 = spirv.Load "StorageBuffer" %236 : f16
%238 = spirv.IAdd %31, %cst69_i32 : i32
%239 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %238] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%240 = spirv.Load "StorageBuffer" %239 : f16
%241 = spirv.IAdd %31, %cst70_i32 : i32
%242 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %241] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%243 = spirv.Load "StorageBuffer" %242 : f16
%244 = spirv.IAdd %31, %cst71_i32 : i32
%245 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %244] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%246 = spirv.Load "StorageBuffer" %245 : f16
%247 = spirv.IAdd %31, %cst72_i32 : i32
%248 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %247] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%249 = spirv.Load "StorageBuffer" %248 : f16
%250 = spirv.IAdd %31, %cst73_i32 : i32
%251 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %250] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%252 = spirv.Load "StorageBuffer" %251 : f16
%253 = spirv.IAdd %31, %cst74_i32 : i32
%254 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %253] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%255 = spirv.Load "StorageBuffer" %254 : f16
%256 = spirv.IAdd %31, %cst75_i32 : i32
%257 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %256] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%258 = spirv.Load "StorageBuffer" %257 : f16
%259 = spirv.IAdd %31, %cst76_i32 : i32
%260 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %259] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%261 = spirv.Load "StorageBuffer" %260 : f16
%262 = spirv.IAdd %31, %cst77_i32 : i32
%263 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %262] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%264 = spirv.Load "StorageBuffer" %263 : f16
%265 = spirv.IAdd %31, %cst78_i32 : i32
%266 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %265] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%267 = spirv.Load "StorageBuffer" %266 : f16
%268 = spirv.IAdd %31, %cst79_i32 : i32
%269 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %268] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%270 = spirv.Load "StorageBuffer" %269 : f16
%271 = spirv.IAdd %31, %cst80_i32 : i32
%272 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %271] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%273 = spirv.Load "StorageBuffer" %272 : f16
%274 = spirv.IAdd %31, %cst81_i32 : i32
%275 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %274] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%276 = spirv.Load "StorageBuffer" %275 : f16
%277 = spirv.IAdd %31, %cst82_i32 : i32
%278 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %277] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%279 = spirv.Load "StorageBuffer" %278 : f16
%280 = spirv.IAdd %31, %cst83_i32 : i32
%281 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %280] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%282 = spirv.Load "StorageBuffer" %281 : f16
%283 = spirv.IAdd %31, %cst84_i32 : i32
%284 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %283] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%285 = spirv.Load "StorageBuffer" %284 : f16
%286 = spirv.IAdd %31, %cst85_i32 : i32
%287 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %286] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%288 = spirv.Load "StorageBuffer" %287 : f16
%289 = spirv.IAdd %31, %cst86_i32 : i32
%290 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %289] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%291 = spirv.Load "StorageBuffer" %290 : f16
%292 = spirv.IAdd %31, %cst87_i32 : i32
%293 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %292] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%294 = spirv.Load "StorageBuffer" %293 : f16
%295 = spirv.IAdd %31, %cst88_i32 : i32
%296 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %295] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%297 = spirv.Load "StorageBuffer" %296 : f16
%298 = spirv.IAdd %31, %cst89_i32 : i32
%299 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %298] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%300 = spirv.Load "StorageBuffer" %299 : f16
%301 = spirv.IAdd %31, %cst90_i32 : i32
%302 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %301] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%303 = spirv.Load "StorageBuffer" %302 : f16
%304 = spirv.IAdd %31, %cst91_i32 : i32
%305 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %304] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%306 = spirv.Load "StorageBuffer" %305 : f16
%307 = spirv.IAdd %31, %cst92_i32 : i32
%308 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %307] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%309 = spirv.Load "StorageBuffer" %308 : f16
%310 = spirv.IAdd %31, %cst93_i32 : i32
%311 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %310] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%312 = spirv.Load "StorageBuffer" %311 : f16
%313 = spirv.IAdd %31, %cst94_i32 : i32
%314 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %313] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%315 = spirv.Load "StorageBuffer" %314 : f16
%316 = spirv.IAdd %31, %cst95_i32 : i32
%317 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %316] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%318 = spirv.Load "StorageBuffer" %317 : f16
%319 = spirv.IAdd %31, %cst96_i32 : i32
%320 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %319] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%321 = spirv.Load "StorageBuffer" %320 : f16
%322 = spirv.IAdd %31, %cst97_i32 : i32
%323 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %322] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%324 = spirv.Load "StorageBuffer" %323 : f16
%325 = spirv.IAdd %31, %cst98_i32 : i32
%326 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %325] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%327 = spirv.Load "StorageBuffer" %326 : f16
%328 = spirv.IAdd %31, %cst99_i32 : i32
%329 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %328] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%330 = spirv.Load "StorageBuffer" %329 : f16
%331 = spirv.IAdd %31, %cst100_i32 : i32
%332 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %331] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%333 = spirv.Load "StorageBuffer" %332 : f16
%334 = spirv.IAdd %31, %cst101_i32 : i32
%335 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %334] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%336 = spirv.Load "StorageBuffer" %335 : f16
%337 = spirv.IAdd %31, %cst102_i32 : i32
%338 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %337] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%339 = spirv.Load "StorageBuffer" %338 : f16
%340 = spirv.IAdd %31, %cst103_i32 : i32
%341 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %340] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%342 = spirv.Load "StorageBuffer" %341 : f16
%343 = spirv.IAdd %31, %cst104_i32 : i32
%344 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %343] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%345 = spirv.Load "StorageBuffer" %344 : f16
%346 = spirv.IAdd %31, %cst105_i32 : i32
%347 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %346] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%348 = spirv.Load "StorageBuffer" %347 : f16
%349 = spirv.IAdd %31, %cst106_i32 : i32
%350 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %349] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%351 = spirv.Load "StorageBuffer" %350 : f16
%352 = spirv.IAdd %31, %cst107_i32 : i32
%353 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %352] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%354 = spirv.Load "StorageBuffer" %353 : f16
%355 = spirv.IAdd %31, %cst108_i32 : i32
%356 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %355] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%357 = spirv.Load "StorageBuffer" %356 : f16
%358 = spirv.IAdd %31, %cst109_i32 : i32
%359 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %358] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%360 = spirv.Load "StorageBuffer" %359 : f16
%361 = spirv.IAdd %31, %cst110_i32 : i32
%362 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %361] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%363 = spirv.Load "StorageBuffer" %362 : f16
%364 = spirv.IAdd %31, %cst111_i32 : i32
%365 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %364] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%366 = spirv.Load "StorageBuffer" %365 : f16
%367 = spirv.IAdd %31, %cst112_i32 : i32
%368 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %367] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%369 = spirv.Load "StorageBuffer" %368 : f16
%370 = spirv.IAdd %31, %cst113_i32 : i32
%371 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %370] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%372 = spirv.Load "StorageBuffer" %371 : f16
%373 = spirv.IAdd %31, %cst114_i32 : i32
%374 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %373] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%375 = spirv.Load "StorageBuffer" %374 : f16
%376 = spirv.IAdd %31, %cst115_i32 : i32
%377 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %376] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%378 = spirv.Load "StorageBuffer" %377 : f16
%379 = spirv.IAdd %31, %cst116_i32 : i32
%380 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %379] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%381 = spirv.Load "StorageBuffer" %380 : f16
%382 = spirv.IAdd %31, %cst117_i32 : i32
%383 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %382] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%384 = spirv.Load "StorageBuffer" %383 : f16
%385 = spirv.IAdd %31, %cst118_i32 : i32
%386 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %385] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%387 = spirv.Load "StorageBuffer" %386 : f16
%388 = spirv.IAdd %31, %cst119_i32 : i32
%389 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %388] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%390 = spirv.Load "StorageBuffer" %389 : f16
%391 = spirv.IAdd %31, %cst120_i32 : i32
%392 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %391] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%393 = spirv.Load "StorageBuffer" %392 : f16
%394 = spirv.IAdd %31, %cst121_i32 : i32
%395 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %394] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%396 = spirv.Load "StorageBuffer" %395 : f16
%397 = spirv.IAdd %31, %cst122_i32 : i32
%398 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %397] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%399 = spirv.Load "StorageBuffer" %398 : f16
%400 = spirv.IAdd %31, %cst123_i32 : i32
%401 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %400] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%402 = spirv.Load "StorageBuffer" %401 : f16
%403 = spirv.IAdd %31, %cst124_i32 : i32
%404 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %403] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%405 = spirv.Load "StorageBuffer" %404 : f16
%406 = spirv.IAdd %31, %cst125_i32 : i32
%407 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %406] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%408 = spirv.Load "StorageBuffer" %407 : f16
%409 = spirv.IAdd %31, %cst126_i32 : i32
%410 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %409] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%411 = spirv.Load "StorageBuffer" %410 : f16
%412 = spirv.IAdd %31, %cst127_i32 : i32
%413 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %412] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%414 = spirv.Load "StorageBuffer" %413 : f16
%415 = spirv.IAdd %31, %cst128_i32 : i32
%416 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %415] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%417 = spirv.Load "StorageBuffer" %416 : f16
%418 = spirv.IAdd %31, %cst129_i32 : i32
%419 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %418] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%420 = spirv.Load "StorageBuffer" %419 : f16
%421 = spirv.IAdd %31, %cst130_i32 : i32
%422 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %421] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%423 = spirv.Load "StorageBuffer" %422 : f16
%424 = spirv.IAdd %31, %cst131_i32 : i32
%425 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %424] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%426 = spirv.Load "StorageBuffer" %425 : f16
%427 = spirv.IAdd %31, %cst132_i32 : i32
%428 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %427] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%429 = spirv.Load "StorageBuffer" %428 : f16
%430 = spirv.IAdd %31, %cst133_i32 : i32
%431 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %430] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%432 = spirv.Load "StorageBuffer" %431 : f16
%433 = spirv.IAdd %31, %cst134_i32 : i32
%434 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %433] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%435 = spirv.Load "StorageBuffer" %434 : f16
%436 = spirv.IAdd %31, %cst135_i32 : i32
%437 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %436] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%438 = spirv.Load "StorageBuffer" %437 : f16
%439 = spirv.IAdd %31, %cst136_i32 : i32
%440 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %439] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%441 = spirv.Load "StorageBuffer" %440 : f16
%442 = spirv.IAdd %31, %cst137_i32 : i32
%443 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %442] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%444 = spirv.Load "StorageBuffer" %443 : f16
%445 = spirv.IAdd %31, %cst138_i32 : i32
%446 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %445] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%447 = spirv.Load "StorageBuffer" %446 : f16
%448 = spirv.IAdd %31, %cst139_i32 : i32
%449 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %448] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%450 = spirv.Load "StorageBuffer" %449 : f16
%451 = spirv.IAdd %31, %cst140_i32 : i32
%452 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %451] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%453 = spirv.Load "StorageBuffer" %452 : f16
%454 = spirv.IAdd %31, %cst141_i32 : i32
%455 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %454] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%456 = spirv.Load "StorageBuffer" %455 : f16
%457 = spirv.IAdd %31, %cst142_i32 : i32
%458 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %457] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%459 = spirv.Load "StorageBuffer" %458 : f16
%460 = spirv.IAdd %31, %cst143_i32 : i32
%461 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %460] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%462 = spirv.Load "StorageBuffer" %461 : f16
%463 = spirv.IAdd %31, %cst144_i32 : i32
%464 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %463] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%465 = spirv.Load "StorageBuffer" %464 : f16
%466 = spirv.IAdd %31, %cst145_i32 : i32
%467 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %466] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%468 = spirv.Load "StorageBuffer" %467 : f16
%469 = spirv.IAdd %31, %cst146_i32 : i32
%470 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %469] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%471 = spirv.Load "StorageBuffer" %470 : f16
%472 = spirv.IAdd %31, %cst147_i32 : i32
%473 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %472] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%474 = spirv.Load "StorageBuffer" %473 : f16
%475 = spirv.IAdd %31, %cst148_i32 : i32
%476 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %475] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%477 = spirv.Load "StorageBuffer" %476 : f16
%478 = spirv.IAdd %31, %cst149_i32 : i32
%479 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %478] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%480 = spirv.Load "StorageBuffer" %479 : f16
%481 = spirv.IAdd %31, %cst150_i32 : i32
%482 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %481] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%483 = spirv.Load "StorageBuffer" %482 : f16
%484 = spirv.IAdd %31, %cst151_i32 : i32
%485 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %484] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%486 = spirv.Load "StorageBuffer" %485 : f16
%487 = spirv.IAdd %31, %cst152_i32 : i32
%488 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %487] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%489 = spirv.Load "StorageBuffer" %488 : f16
%490 = spirv.IAdd %31, %cst153_i32 : i32
%491 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %490] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%492 = spirv.Load "StorageBuffer" %491 : f16
%493 = spirv.IAdd %31, %cst154_i32 : i32
%494 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %493] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%495 = spirv.Load "StorageBuffer" %494 : f16
%496 = spirv.IAdd %31, %cst155_i32 : i32
%497 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %496] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%498 = spirv.Load "StorageBuffer" %497 : f16
%499 = spirv.IAdd %31, %cst156_i32 : i32
%500 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %499] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%501 = spirv.Load "StorageBuffer" %500 : f16
%502 = spirv.IAdd %31, %cst157_i32 : i32
%503 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %502] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%504 = spirv.Load "StorageBuffer" %503 : f16
%505 = spirv.IAdd %31, %cst158_i32 : i32
%506 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %505] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%507 = spirv.Load "StorageBuffer" %506 : f16
%508 = spirv.IAdd %31, %cst159_i32 : i32
%509 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %508] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%510 = spirv.Load "StorageBuffer" %509 : f16
%511 = spirv.IAdd %31, %cst160_i32 : i32
%512 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %511] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%513 = spirv.Load "StorageBuffer" %512 : f16
%514 = spirv.IAdd %31, %cst161_i32 : i32
%515 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %514] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%516 = spirv.Load "StorageBuffer" %515 : f16
%517 = spirv.IAdd %31, %cst162_i32 : i32
%518 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %517] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%519 = spirv.Load "StorageBuffer" %518 : f16
%520 = spirv.IAdd %31, %cst163_i32 : i32
%521 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %520] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%522 = spirv.Load "StorageBuffer" %521 : f16
%523 = spirv.IAdd %31, %cst164_i32 : i32
%524 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %523] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%525 = spirv.Load "StorageBuffer" %524 : f16
%526 = spirv.IAdd %31, %cst165_i32 : i32
%527 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %526] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%528 = spirv.Load "StorageBuffer" %527 : f16
%529 = spirv.IAdd %31, %cst166_i32 : i32
%530 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %529] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%531 = spirv.Load "StorageBuffer" %530 : f16
%532 = spirv.IAdd %31, %cst167_i32 : i32
%533 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %532] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%534 = spirv.Load "StorageBuffer" %533 : f16
%535 = spirv.IAdd %31, %cst168_i32 : i32
%536 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %535] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%537 = spirv.Load "StorageBuffer" %536 : f16
%538 = spirv.IAdd %31, %cst169_i32 : i32
%539 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %538] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%540 = spirv.Load "StorageBuffer" %539 : f16
%541 = spirv.IAdd %31, %cst170_i32 : i32
%542 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %541] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%543 = spirv.Load "StorageBuffer" %542 : f16
%544 = spirv.IAdd %31, %cst171_i32 : i32
%545 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %544] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%546 = spirv.Load "StorageBuffer" %545 : f16
%547 = spirv.IAdd %31, %cst172_i32 : i32
%548 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %547] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%549 = spirv.Load "StorageBuffer" %548 : f16
%550 = spirv.IAdd %31, %cst173_i32 : i32
%551 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %550] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%552 = spirv.Load "StorageBuffer" %551 : f16
%553 = spirv.IAdd %31, %cst174_i32 : i32
%554 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %553] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%555 = spirv.Load "StorageBuffer" %554 : f16
%556 = spirv.IAdd %31, %cst175_i32 : i32
%557 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %556] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%558 = spirv.Load "StorageBuffer" %557 : f16
%559 = spirv.IAdd %31, %cst176_i32 : i32
%560 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %559] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%561 = spirv.Load "StorageBuffer" %560 : f16
%562 = spirv.IAdd %31, %cst177_i32 : i32
%563 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %562] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%564 = spirv.Load "StorageBuffer" %563 : f16
%565 = spirv.IAdd %31, %cst178_i32 : i32
%566 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %565] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%567 = spirv.Load "StorageBuffer" %566 : f16
%568 = spirv.IAdd %31, %cst179_i32 : i32
%569 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %568] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%570 = spirv.Load "StorageBuffer" %569 : f16
%571 = spirv.IAdd %31, %cst180_i32 : i32
%572 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %571] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%573 = spirv.Load "StorageBuffer" %572 : f16
%574 = spirv.IAdd %31, %cst181_i32 : i32
%575 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %574] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%576 = spirv.Load "StorageBuffer" %575 : f16
%577 = spirv.IAdd %31, %cst182_i32 : i32
%578 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %577] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%579 = spirv.Load "StorageBuffer" %578 : f16
%580 = spirv.IAdd %31, %cst183_i32 : i32
%581 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %580] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%582 = spirv.Load "StorageBuffer" %581 : f16
%583 = spirv.IAdd %31, %cst184_i32 : i32
%584 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %583] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%585 = spirv.Load "StorageBuffer" %584 : f16
%586 = spirv.IAdd %31, %cst185_i32 : i32
%587 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %586] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%588 = spirv.Load "StorageBuffer" %587 : f16
%589 = spirv.IAdd %31, %cst186_i32 : i32
%590 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %589] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%591 = spirv.Load "StorageBuffer" %590 : f16
%592 = spirv.IAdd %31, %cst187_i32 : i32
%593 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %592] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%594 = spirv.Load "StorageBuffer" %593 : f16
%595 = spirv.IAdd %31, %cst188_i32 : i32
%596 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %595] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%597 = spirv.Load "StorageBuffer" %596 : f16
%598 = spirv.IAdd %31, %cst189_i32 : i32
%599 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %598] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%600 = spirv.Load "StorageBuffer" %599 : f16
%601 = spirv.IAdd %31, %cst190_i32 : i32
%602 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %601] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%603 = spirv.Load "StorageBuffer" %602 : f16
%604 = spirv.IAdd %31, %cst191_i32 : i32
%605 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %604] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%606 = spirv.Load "StorageBuffer" %605 : f16
%607 = spirv.IAdd %31, %cst192_i32 : i32
%608 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %607] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%609 = spirv.Load "StorageBuffer" %608 : f16
%610 = spirv.IAdd %31, %cst193_i32 : i32
%611 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %610] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%612 = spirv.Load "StorageBuffer" %611 : f16
%613 = spirv.IAdd %31, %cst194_i32 : i32
%614 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %613] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%615 = spirv.Load "StorageBuffer" %614 : f16
%616 = spirv.IAdd %31, %cst195_i32 : i32
%617 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %616] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%618 = spirv.Load "StorageBuffer" %617 : f16
%619 = spirv.IAdd %31, %cst196_i32 : i32
%620 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %619] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%621 = spirv.Load "StorageBuffer" %620 : f16
%622 = spirv.IAdd %31, %cst197_i32 : i32
%623 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %622] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%624 = spirv.Load "StorageBuffer" %623 : f16
%625 = spirv.IAdd %31, %cst198_i32 : i32
%626 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %625] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%627 = spirv.Load "StorageBuffer" %626 : f16
%628 = spirv.IAdd %31, %cst199_i32 : i32
%629 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %628] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%630 = spirv.Load "StorageBuffer" %629 : f16
%631 = spirv.IAdd %31, %cst200_i32 : i32
%632 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %631] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%633 = spirv.Load "StorageBuffer" %632 : f16
%634 = spirv.IAdd %31, %cst201_i32 : i32
%635 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %634] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%636 = spirv.Load "StorageBuffer" %635 : f16
%637 = spirv.IAdd %31, %cst202_i32 : i32
%638 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %637] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%639 = spirv.Load "StorageBuffer" %638 : f16
%640 = spirv.IAdd %31, %cst203_i32 : i32
%641 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %640] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%642 = spirv.Load "StorageBuffer" %641 : f16
%643 = spirv.IAdd %31, %cst204_i32 : i32
%644 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %643] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%645 = spirv.Load "StorageBuffer" %644 : f16
%646 = spirv.IAdd %31, %cst205_i32 : i32
%647 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %646] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%648 = spirv.Load "StorageBuffer" %647 : f16
%649 = spirv.IAdd %31, %cst206_i32 : i32
%650 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %649] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%651 = spirv.Load "StorageBuffer" %650 : f16
%652 = spirv.IAdd %31, %cst207_i32 : i32
%653 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %652] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%654 = spirv.Load "StorageBuffer" %653 : f16
%655 = spirv.IAdd %31, %cst208_i32 : i32
%656 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %655] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%657 = spirv.Load "StorageBuffer" %656 : f16
%658 = spirv.IAdd %31, %cst209_i32 : i32
%659 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %658] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%660 = spirv.Load "StorageBuffer" %659 : f16
%661 = spirv.IAdd %31, %cst210_i32 : i32
%662 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %661] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%663 = spirv.Load "StorageBuffer" %662 : f16
%664 = spirv.IAdd %31, %cst211_i32 : i32
%665 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %664] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%666 = spirv.Load "StorageBuffer" %665 : f16
%667 = spirv.IAdd %31, %cst212_i32 : i32
%668 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %667] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%669 = spirv.Load "StorageBuffer" %668 : f16
%670 = spirv.IAdd %31, %cst213_i32 : i32
%671 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %670] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%672 = spirv.Load "StorageBuffer" %671 : f16
%673 = spirv.IAdd %31, %cst214_i32 : i32
%674 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %673] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%675 = spirv.Load "StorageBuffer" %674 : f16
%676 = spirv.IAdd %31, %cst215_i32 : i32
%677 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %676] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%678 = spirv.Load "StorageBuffer" %677 : f16
%679 = spirv.IAdd %31, %cst216_i32 : i32
%680 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %679] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%681 = spirv.Load "StorageBuffer" %680 : f16
%682 = spirv.IAdd %31, %cst217_i32 : i32
%683 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %682] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%684 = spirv.Load "StorageBuffer" %683 : f16
%685 = spirv.IAdd %31, %cst218_i32 : i32
%686 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %685] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%687 = spirv.Load "StorageBuffer" %686 : f16
%688 = spirv.IAdd %31, %cst219_i32 : i32
%689 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %688] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%690 = spirv.Load "StorageBuffer" %689 : f16
%691 = spirv.IAdd %31, %cst220_i32 : i32
%692 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %691] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%693 = spirv.Load "StorageBuffer" %692 : f16
%694 = spirv.IAdd %31, %cst221_i32 : i32
%695 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %694] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%696 = spirv.Load "StorageBuffer" %695 : f16
%697 = spirv.IAdd %31, %cst222_i32 : i32
%698 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %697] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%699 = spirv.Load "StorageBuffer" %698 : f16
%700 = spirv.IAdd %31, %cst223_i32 : i32
%701 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %700] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%702 = spirv.Load "StorageBuffer" %701 : f16
%703 = spirv.IAdd %31, %cst224_i32 : i32
%704 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %703] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%705 = spirv.Load "StorageBuffer" %704 : f16
%706 = spirv.IAdd %31, %cst225_i32 : i32
%707 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %706] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%708 = spirv.Load "StorageBuffer" %707 : f16
%709 = spirv.IAdd %31, %cst226_i32 : i32
%710 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %709] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%711 = spirv.Load "StorageBuffer" %710 : f16
%712 = spirv.IAdd %31, %cst227_i32 : i32
%713 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %712] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%714 = spirv.Load "StorageBuffer" %713 : f16
%715 = spirv.IAdd %31, %cst228_i32 : i32
%716 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %715] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%717 = spirv.Load "StorageBuffer" %716 : f16
%718 = spirv.IAdd %31, %cst229_i32 : i32
%719 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %718] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%720 = spirv.Load "StorageBuffer" %719 : f16
%721 = spirv.IAdd %31, %cst230_i32 : i32
%722 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %721] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%723 = spirv.Load "StorageBuffer" %722 : f16
%724 = spirv.IAdd %31, %cst231_i32 : i32
%725 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %724] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%726 = spirv.Load "StorageBuffer" %725 : f16
%727 = spirv.IAdd %31, %cst232_i32 : i32
%728 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %727] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%729 = spirv.Load "StorageBuffer" %728 : f16
%730 = spirv.IAdd %31, %cst233_i32 : i32
%731 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %730] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%732 = spirv.Load "StorageBuffer" %731 : f16
%733 = spirv.IAdd %31, %cst234_i32 : i32
%734 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %733] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%735 = spirv.Load "StorageBuffer" %734 : f16
%736 = spirv.IAdd %31, %cst235_i32 : i32
%737 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %736] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%738 = spirv.Load "StorageBuffer" %737 : f16
%739 = spirv.IAdd %31, %cst236_i32 : i32
%740 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %739] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%741 = spirv.Load "StorageBuffer" %740 : f16
%742 = spirv.IAdd %31, %cst237_i32 : i32
%743 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %742] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%744 = spirv.Load "StorageBuffer" %743 : f16
%745 = spirv.IAdd %31, %cst238_i32 : i32
%746 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %745] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%747 = spirv.Load "StorageBuffer" %746 : f16
%748 = spirv.IAdd %31, %cst239_i32 : i32
%749 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %748] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%750 = spirv.Load "StorageBuffer" %749 : f16
%751 = spirv.IAdd %31, %cst240_i32 : i32
%752 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %751] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%753 = spirv.Load "StorageBuffer" %752 : f16
%754 = spirv.IAdd %31, %cst241_i32 : i32
%755 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %754] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%756 = spirv.Load "StorageBuffer" %755 : f16
%757 = spirv.IAdd %31, %cst242_i32 : i32
%758 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %757] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%759 = spirv.Load "StorageBuffer" %758 : f16
%760 = spirv.IAdd %31, %cst243_i32 : i32
%761 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %760] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%762 = spirv.Load "StorageBuffer" %761 : f16
%763 = spirv.IAdd %31, %cst244_i32 : i32
%764 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %763] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%765 = spirv.Load "StorageBuffer" %764 : f16
%766 = spirv.IAdd %31, %cst245_i32 : i32
%767 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %766] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%768 = spirv.Load "StorageBuffer" %767 : f16
%769 = spirv.IAdd %31, %cst246_i32 : i32
%770 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %769] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%771 = spirv.Load "StorageBuffer" %770 : f16
%772 = spirv.IAdd %31, %cst247_i32 : i32
%773 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %772] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%774 = spirv.Load "StorageBuffer" %773 : f16
%775 = spirv.IAdd %31, %cst248_i32 : i32
%776 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %775] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%777 = spirv.Load "StorageBuffer" %776 : f16
%778 = spirv.IAdd %31, %cst249_i32 : i32
%779 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %778] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%780 = spirv.Load "StorageBuffer" %779 : f16
%781 = spirv.IAdd %31, %cst250_i32 : i32
%782 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %781] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%783 = spirv.Load "StorageBuffer" %782 : f16
%784 = spirv.IAdd %31, %cst251_i32 : i32
%785 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %784] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%786 = spirv.Load "StorageBuffer" %785 : f16
%787 = spirv.IAdd %31, %cst252_i32 : i32
%788 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %787] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%789 = spirv.Load "StorageBuffer" %788 : f16
%790 = spirv.IAdd %31, %cst253_i32 : i32
%791 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %790] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%792 = spirv.Load "StorageBuffer" %791 : f16
%793 = spirv.IAdd %31, %cst254_i32 : i32
%794 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %793] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%795 = spirv.Load "StorageBuffer" %794 : f16
%796 = spirv.IAdd %31, %cst255_i32 : i32
%797 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %796] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%798 = spirv.Load "StorageBuffer" %797 : f16
%799 = spirv.IAdd %31, %cst256_i32 : i32
%800 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %799] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%801 = spirv.Load "StorageBuffer" %800 : f16
%802 = spirv.IAdd %31, %cst257_i32 : i32
%803 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %802] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%804 = spirv.Load "StorageBuffer" %803 : f16
%805 = spirv.IAdd %31, %cst258_i32 : i32
%806 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %805] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%807 = spirv.Load "StorageBuffer" %806 : f16
%808 = spirv.IAdd %31, %cst259_i32 : i32
%809 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %808] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%810 = spirv.Load "StorageBuffer" %809 : f16
%811 = spirv.IAdd %31, %cst260_i32 : i32
%812 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %811] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%813 = spirv.Load "StorageBuffer" %812 : f16
%814 = spirv.IAdd %31, %cst261_i32 : i32
%815 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %814] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%816 = spirv.Load "StorageBuffer" %815 : f16
%817 = spirv.IAdd %31, %cst262_i32 : i32
%818 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %817] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%819 = spirv.Load "StorageBuffer" %818 : f16
%820 = spirv.IAdd %31, %cst263_i32 : i32
%821 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %820] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%822 = spirv.Load "StorageBuffer" %821 : f16
%823 = spirv.IAdd %31, %cst264_i32 : i32
%824 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %823] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%825 = spirv.Load "StorageBuffer" %824 : f16
%826 = spirv.IAdd %31, %cst265_i32 : i32
%827 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %826] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%828 = spirv.Load "StorageBuffer" %827 : f16
%829 = spirv.IAdd %31, %cst266_i32 : i32
%830 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %829] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%831 = spirv.Load "StorageBuffer" %830 : f16
%832 = spirv.IAdd %31, %cst267_i32 : i32
%833 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %832] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%834 = spirv.Load "StorageBuffer" %833 : f16
%835 = spirv.IAdd %31, %cst268_i32 : i32
%836 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %835] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%837 = spirv.Load "StorageBuffer" %836 : f16
%838 = spirv.IAdd %31, %cst269_i32 : i32
%839 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %838] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%840 = spirv.Load "StorageBuffer" %839 : f16
%841 = spirv.IAdd %31, %cst270_i32 : i32
%842 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %841] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%843 = spirv.Load "StorageBuffer" %842 : f16
%844 = spirv.IAdd %31, %cst271_i32 : i32
%845 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %844] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%846 = spirv.Load "StorageBuffer" %845 : f16
%847 = spirv.IAdd %31, %cst272_i32 : i32
%848 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %847] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%849 = spirv.Load "StorageBuffer" %848 : f16
%850 = spirv.IAdd %31, %cst273_i32 : i32
%851 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %850] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%852 = spirv.Load "StorageBuffer" %851 : f16
%853 = spirv.IAdd %31, %cst274_i32 : i32
%854 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %853] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%855 = spirv.Load "StorageBuffer" %854 : f16
%856 = spirv.IAdd %31, %cst275_i32 : i32
%857 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %856] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%858 = spirv.Load "StorageBuffer" %857 : f16
%859 = spirv.IAdd %31, %cst276_i32 : i32
%860 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %859] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%861 = spirv.Load "StorageBuffer" %860 : f16
%862 = spirv.IAdd %31, %cst277_i32 : i32
%863 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %862] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%864 = spirv.Load "StorageBuffer" %863 : f16
%865 = spirv.IAdd %31, %cst278_i32 : i32
%866 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %865] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%867 = spirv.Load "StorageBuffer" %866 : f16
%868 = spirv.IAdd %31, %cst279_i32 : i32
%869 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %868] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%870 = spirv.Load "StorageBuffer" %869 : f16
%871 = spirv.IAdd %31, %cst280_i32 : i32
%872 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %871] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%873 = spirv.Load "StorageBuffer" %872 : f16
%874 = spirv.IAdd %31, %cst281_i32 : i32
%875 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %874] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%876 = spirv.Load "StorageBuffer" %875 : f16
%877 = spirv.IAdd %31, %cst282_i32 : i32
%878 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %877] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%879 = spirv.Load "StorageBuffer" %878 : f16
%880 = spirv.IAdd %31, %cst283_i32 : i32
%881 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %880] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%882 = spirv.Load "StorageBuffer" %881 : f16
%883 = spirv.IAdd %31, %cst284_i32 : i32
%884 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %883] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%885 = spirv.Load "StorageBuffer" %884 : f16
%886 = spirv.IAdd %31, %cst285_i32 : i32
%887 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %886] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%888 = spirv.Load "StorageBuffer" %887 : f16
%889 = spirv.IAdd %31, %cst286_i32 : i32
%890 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %889] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%891 = spirv.Load "StorageBuffer" %890 : f16
%892 = spirv.IAdd %31, %cst287_i32 : i32
%893 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %892] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%894 = spirv.Load "StorageBuffer" %893 : f16
%895 = spirv.IAdd %31, %cst288_i32 : i32
%896 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %895] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%897 = spirv.Load "StorageBuffer" %896 : f16
%898 = spirv.IAdd %31, %cst289_i32 : i32
%899 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %898] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%900 = spirv.Load "StorageBuffer" %899 : f16
%901 = spirv.IAdd %31, %cst290_i32 : i32
%902 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %901] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%903 = spirv.Load "StorageBuffer" %902 : f16
%904 = spirv.IAdd %31, %cst291_i32 : i32
%905 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %904] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%906 = spirv.Load "StorageBuffer" %905 : f16
%907 = spirv.IAdd %31, %cst292_i32 : i32
%908 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %907] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%909 = spirv.Load "StorageBuffer" %908 : f16
%910 = spirv.IAdd %31, %cst293_i32 : i32
%911 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %910] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%912 = spirv.Load "StorageBuffer" %911 : f16
%913 = spirv.IAdd %31, %cst294_i32 : i32
%914 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %913] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%915 = spirv.Load "StorageBuffer" %914 : f16
%916 = spirv.IAdd %31, %cst295_i32 : i32
%917 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %916] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%918 = spirv.Load "StorageBuffer" %917 : f16
%919 = spirv.IAdd %31, %cst296_i32 : i32
%920 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %919] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%921 = spirv.Load "StorageBuffer" %920 : f16
%922 = spirv.IAdd %31, %cst297_i32 : i32
%923 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %922] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%924 = spirv.Load "StorageBuffer" %923 : f16
%925 = spirv.IAdd %31, %cst298_i32 : i32
%926 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %925] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%927 = spirv.Load "StorageBuffer" %926 : f16
%928 = spirv.IAdd %31, %cst299_i32 : i32
%929 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %928] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%930 = spirv.Load "StorageBuffer" %929 : f16
%931 = spirv.IAdd %31, %cst300_i32 : i32
%932 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %931] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%933 = spirv.Load "StorageBuffer" %932 : f16
%934 = spirv.IAdd %31, %cst301_i32 : i32
%935 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %934] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%936 = spirv.Load "StorageBuffer" %935 : f16
%937 = spirv.IAdd %31, %cst302_i32 : i32
%938 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %937] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%939 = spirv.Load "StorageBuffer" %938 : f16
%940 = spirv.IAdd %31, %cst303_i32 : i32
%941 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %940] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%942 = spirv.Load "StorageBuffer" %941 : f16
%943 = spirv.IAdd %31, %cst304_i32 : i32
%944 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %943] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%945 = spirv.Load "StorageBuffer" %944 : f16
%946 = spirv.IAdd %31, %cst305_i32 : i32
%947 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %946] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%948 = spirv.Load "StorageBuffer" %947 : f16
%949 = spirv.IAdd %31, %cst306_i32 : i32
%950 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %949] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%951 = spirv.Load "StorageBuffer" %950 : f16
%952 = spirv.IAdd %31, %cst307_i32 : i32
%953 = spirv.AccessChain %__resource_var_0_0__addr[%cst0_i32, %952] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<f16, stride=2> [0])>, StorageBuffer>, i32, i32
%954 = spirv.Load "StorageBuffer" %953 : f16
%955 = spirv.CompositeConstruct %33, %264, %495, %726 : (f16, f16, f16, f16) -> vector<4xf16>
%956 = spirv.CompositeConstruct %36, %267, %498, %729 : (f16, f16, f16, f16) -> vector<4xf16>
%957 = spirv.GL.FMax %956, %955 : vector<4xf16>
%958 = spirv.IsNan %956 : vector<4xf16>
%959 = spirv.IsNan %955 : vector<4xf16>
%960 = spirv.Select %958, %956, %957 : vector<4xi1>, vector<4xf16>
%961 = spirv.Select %959, %955, %960 : vector<4xi1>, vector<4xf16>
%962 = spirv.CompositeConstruct %39, %270, %501, %732 : (f16, f16, f16, f16) -> vector<4xf16>
%963 = spirv.GL.FMax %962, %961 : vector<4xf16>
%964 = spirv.IsNan %962 : vector<4xf16>
%965 = spirv.IsNan %961 : vector<4xf16>
%966 = spirv.Select %964, %962, %963 : vector<4xi1>, vector<4xf16>
%967 = spirv.Select %965, %961, %966 : vector<4xi1>, vector<4xf16>
%968 = spirv.CompositeConstruct %42, %273, %504, %735 : (f16, f16, f16, f16) -> vector<4xf16>
%969 = spirv.GL.FMax %968, %967 : vector<4xf16>
%970 = spirv.IsNan %968 : vector<4xf16>
%971 = spirv.IsNan %967 : vector<4xf16>
%972 = spirv.Select %970, %968, %969 : vector<4xi1>, vector<4xf16>
%973 = spirv.Select %971, %967, %972 : vector<4xi1>, vector<4xf16>
%974 = spirv.CompositeConstruct %45, %276, %507, %738 : (f16, f16, f16, f16) -> vector<4xf16>
%975 = spirv.GL.FMax %974, %973 : vector<4xf16>
%976 = spirv.IsNan %974 : vector<4xf16>
%977 = spirv.IsNan %973 : vector<4xf16>
%978 = spirv.Select %976, %974, %975 : vector<4xi1>, vector<4xf16>
%979 = spirv.Select %977, %973, %978 : vector<4xi1>, vector<4xf16>
%980 = spirv.CompositeConstruct %48, %279, %510, %741 : (f16, f16, f16, f16) -> vector<4xf16>
%981 = spirv.GL.FMax %980, %979 : vector<4xf16>
%982 = spirv.IsNan %980 : vector<4xf16>
%983 = spirv.IsNan %979 : vector<4xf16>
%984 = spirv.Select %982, %980, %981 : vector<4xi1>, vector<4xf16>
%985 = spirv.Select %983, %979, %984 : vector<4xi1>, vector<4xf16>
%986 = spirv.CompositeConstruct %51, %282, %513, %744 : (f16, f16, f16, f16) -> vector<4xf16>
%987 = spirv.GL.FMax %986, %985 : vector<4xf16>
%988 = spirv.IsNan %986 : vector<4xf16>
%989 = spirv.IsNan %985 : vector<4xf16>
%990 = spirv.Select %988, %986, %987 : vector<4xi1>, vector<4xf16>
%991 = spirv.Select %989, %985, %990 : vector<4xi1>, vector<4xf16>
%992 = spirv.CompositeConstruct %54, %285, %516, %747 : (f16, f16, f16, f16) -> vector<4xf16>
%993 = spirv.GL.FMax %992, %991 : vector<4xf16>
%994 = spirv.IsNan %992 : vector<4xf16>
%995 = spirv.IsNan %991 : vector<4xf16>
%996 = spirv.Select %994, %992, %993 : vector<4xi1>, vector<4xf16>
%997 = spirv.Select %995, %991, %996 : vector<4xi1>, vector<4xf16>
%998 = spirv.CompositeConstruct %57, %288, %519, %750 : (f16, f16, f16, f16) -> vector<4xf16>
%999 = spirv.GL.FMax %998, %997 : vector<4xf16>
%1000 = spirv.IsNan %998 : vector<4xf16>
%1001 = spirv.IsNan %997 : vector<4xf16>
%1002 = spirv.Select %1000, %998, %999 : vector<4xi1>, vector<4xf16>
%1003 = spirv.Select %1001, %997, %1002 : vector<4xi1>, vector<4xf16>
%1004 = spirv.CompositeConstruct %60, %291, %522, %753 : (f16, f16, f16, f16) -> vector<4xf16>
%1005 = spirv.GL.FMax %1004, %1003 : vector<4xf16>
%1006 = spirv.IsNan %1004 : vector<4xf16>
%1007 = spirv.IsNan %1003 : vector<4xf16>
%1008 = spirv.Select %1006, %1004, %1005 : vector<4xi1>, vector<4xf16>
%1009 = spirv.Select %1007, %1003, %1008 : vector<4xi1>, vector<4xf16>
%1010 = spirv.CompositeConstruct %63, %294, %525, %756 : (f16, f16, f16, f16) -> vector<4xf16>
%1011 = spirv.GL.FMax %1010, %1009 : vector<4xf16>
%1012 = spirv.IsNan %1010 : vector<4xf16>
%1013 = spirv.IsNan %1009 : vector<4xf16>
%1014 = spirv.Select %1012, %1010, %1011 : vector<4xi1>, vector<4xf16>
%1015 = spirv.Select %1013, %1009, %1014 : vector<4xi1>, vector<4xf16>
%1016 = spirv.CompositeConstruct %66, %297, %528, %759 : (f16, f16, f16, f16) -> vector<4xf16>
%1017 = spirv.GL.FMax %1016, %1015 : vector<4xf16>
%1018 = spirv.IsNan %1016 : vector<4xf16>
%1019 = spirv.IsNan %1015 : vector<4xf16>
%1020 = spirv.Select %1018, %1016, %1017 : vector<4xi1>, vector<4xf16>
%1021 = spirv.Select %1019, %1015, %1020 : vector<4xi1>, vector<4xf16>
%1022 = spirv.CompositeConstruct %69, %300, %531, %762 : (f16, f16, f16, f16) -> vector<4xf16>
%1023 = spirv.GL.FMax %1022, %1021 : vector<4xf16>
%1024 = spirv.IsNan %1022 : vector<4xf16>
%1025 = spirv.IsNan %1021 : vector<4xf16>
%1026 = spirv.Select %1024, %1022, %1023 : vector<4xi1>, vector<4xf16>
%1027 = spirv.Select %1025, %1021, %1026 : vector<4xi1>, vector<4xf16>
%1028 = spirv.CompositeConstruct %72, %303, %534, %765 : (f16, f16, f16, f16) -> vector<4xf16>
%1029 = spirv.GL.FMax %1028, %1027 : vector<4xf16>
%1030 = spirv.IsNan %1028 : vector<4xf16>
%1031 = spirv.IsNan %1027 : vector<4xf16>
%1032 = spirv.Select %1030, %1028, %1029 : vector<4xi1>, vector<4xf16>
%1033 = spirv.Select %1031, %1027, %1032 : vector<4xi1>, vector<4xf16>
%1034 = spirv.CompositeConstruct %75, %306, %537, %768 : (f16, f16, f16, f16) -> vector<4xf16>
%1035 = spirv.GL.FMax %1034, %1033 : vector<4xf16>
%1036 = spirv.IsNan %1034 : vector<4xf16>
%1037 = spirv.IsNan %1033 : vector<4xf16>
%1038 = spirv.Select %1036, %1034, %1035 : vector<4xi1>, vector<4xf16>
%1039 = spirv.Select %1037, %1033, %1038 : vector<4xi1>, vector<4xf16>
%1040 = spirv.CompositeConstruct %78, %309, %540, %771 : (f16, f16, f16, f16) -> vector<4xf16>
%1041 = spirv.GL.FMax %1040, %1039 : vector<4xf16>
%1042 = spirv.IsNan %1040 : vector<4xf16>
%1043 = spirv.IsNan %1039 : vector<4xf16>
%1044 = spirv.Select %1042, %1040, %1041 : vector<4xi1>, vector<4xf16>
%1045 = spirv.Select %1043, %1039, %1044 : vector<4xi1>, vector<4xf16>
%1046 = spirv.CompositeConstruct %81, %312, %543, %774 : (f16, f16, f16, f16) -> vector<4xf16>
%1047 = spirv.GL.FMax %1046, %1045 : vector<4xf16>
%1048 = spirv.IsNan %1046 : vector<4xf16>
%1049 = spirv.IsNan %1045 : vector<4xf16>
%1050 = spirv.Select %1048, %1046, %1047 : vector<4xi1>, vector<4xf16>
%1051 = spirv.Select %1049, %1045, %1050 : vector<4xi1>, vector<4xf16>
%1052 = spirv.CompositeConstruct %84, %315, %546, %777 : (f16, f16, f16, f16) -> vector<4xf16>
%1053 = spirv.GL.FMax %1052, %1051 : vector<4xf16>
%1054 = spirv.IsNan %1052 : vector<4xf16>
%1055 = spirv.IsNan %1051 : vector<4xf16>
%1056 = spirv.Select %1054, %1052, %1053 : vector<4xi1>, vector<4xf16>
%1057 = spirv.Select %1055, %1051, %1056 : vector<4xi1>, vector<4xf16>
%1058 = spirv.CompositeConstruct %87, %318, %549, %780 : (f16, f16, f16, f16) -> vector<4xf16>
%1059 = spirv.GL.FMax %1058, %1057 : vector<4xf16>
%1060 = spirv.IsNan %1058 : vector<4xf16>
%1061 = spirv.IsNan %1057 : vector<4xf16>
%1062 = spirv.Select %1060, %1058, %1059 : vector<4xi1>, vector<4xf16>
%1063 = spirv.Select %1061, %1057, %1062 : vector<4xi1>, vector<4xf16>
%1064 = spirv.CompositeConstruct %90, %321, %552, %783 : (f16, f16, f16, f16) -> vector<4xf16>
%1065 = spirv.GL.FMax %1064, %1063 : vector<4xf16>
%1066 = spirv.IsNan %1064 : vector<4xf16>
%1067 = spirv.IsNan %1063 : vector<4xf16>
%1068 = spirv.Select %1066, %1064, %1065 : vector<4xi1>, vector<4xf16>
%1069 = spirv.Select %1067, %1063, %1068 : vector<4xi1>, vector<4xf16>
%1070 = spirv.CompositeConstruct %93, %324, %555, %786 : (f16, f16, f16, f16) -> vector<4xf16>
%1071 = spirv.GL.FMax %1070, %1069 : vector<4xf16>
%1072 = spirv.IsNan %1070 : vector<4xf16>
%1073 = spirv.IsNan %1069 : vector<4xf16>
%1074 = spirv.Select %1072, %1070, %1071 : vector<4xi1>, vector<4xf16>
%1075 = spirv.Select %1073, %1069, %1074 : vector<4xi1>, vector<4xf16>
%1076 = spirv.CompositeConstruct %96, %327, %558, %789 : (f16, f16, f16, f16) -> vector<4xf16>
%1077 = spirv.GL.FMax %1076, %1075 : vector<4xf16>
%1078 = spirv.IsNan %1076 : vector<4xf16>
%1079 = spirv.IsNan %1075 : vector<4xf16>
%1080 = spirv.Select %1078, %1076, %1077 : vector<4xi1>, vector<4xf16>
%1081 = spirv.Select %1079, %1075, %1080 : vector<4xi1>, vector<4xf16>
%1082 = spirv.CompositeConstruct %99, %330, %561, %792 : (f16, f16, f16, f16) -> vector<4xf16>
%1083 = spirv.GL.FMax %1082, %1081 : vector<4xf16>
%1084 = spirv.IsNan %1082 : vector<4xf16>
%1085 = spirv.IsNan %1081 : vector<4xf16>
%1086 = spirv.Select %1084, %1082, %1083 : vector<4xi1>, vector<4xf16>
%1087 = spirv.Select %1085, %1081, %1086 : vector<4xi1>, vector<4xf16>
%1088 = spirv.CompositeConstruct %102, %333, %564, %795 : (f16, f16, f16, f16) -> vector<4xf16>
%1089 = spirv.GL.FMax %1088, %1087 : vector<4xf16>
%1090 = spirv.IsNan %1088 : vector<4xf16>
%1091 = spirv.IsNan %1087 : vector<4xf16>
%1092 = spirv.Select %1090, %1088, %1089 : vector<4xi1>, vector<4xf16>
%1093 = spirv.Select %1091, %1087, %1092 : vector<4xi1>, vector<4xf16>
%1094 = spirv.CompositeConstruct %105, %336, %567, %798 : (f16, f16, f16, f16) -> vector<4xf16>
%1095 = spirv.GL.FMax %1094, %1093 : vector<4xf16>
%1096 = spirv.IsNan %1094 : vector<4xf16>
%1097 = spirv.IsNan %1093 : vector<4xf16>
%1098 = spirv.Select %1096, %1094, %1095 : vector<4xi1>, vector<4xf16>
%1099 = spirv.Select %1097, %1093, %1098 : vector<4xi1>, vector<4xf16>
%1100 = spirv.CompositeConstruct %108, %339, %570, %801 : (f16, f16, f16, f16) -> vector<4xf16>
%1101 = spirv.GL.FMax %1100, %1099 : vector<4xf16>
%1102 = spirv.IsNan %1100 : vector<4xf16>
%1103 = spirv.IsNan %1099 : vector<4xf16>
%1104 = spirv.Select %1102, %1100, %1101 : vector<4xi1>, vector<4xf16>
%1105 = spirv.Select %1103, %1099, %1104 : vector<4xi1>, vector<4xf16>
%1106 = spirv.CompositeConstruct %111, %342, %573, %804 : (f16, f16, f16, f16) -> vector<4xf16>
%1107 = spirv.GL.FMax %1106, %1105 : vector<4xf16>
%1108 = spirv.IsNan %1106 : vector<4xf16>
%1109 = spirv.IsNan %1105 : vector<4xf16>
%1110 = spirv.Select %1108, %1106, %1107 : vector<4xi1>, vector<4xf16>
%1111 = spirv.Select %1109, %1105, %1110 : vector<4xi1>, vector<4xf16>
%1112 = spirv.CompositeConstruct %114, %345, %576, %807 : (f16, f16, f16, f16) -> vector<4xf16>
%1113 = spirv.GL.FMax %1112, %1111 : vector<4xf16>
%1114 = spirv.IsNan %1112 : vector<4xf16>
%1115 = spirv.IsNan %1111 : vector<4xf16>
%1116 = spirv.Select %1114, %1112, %1113 : vector<4xi1>, vector<4xf16>
%1117 = spirv.Select %1115, %1111, %1116 : vector<4xi1>, vector<4xf16>
%1118 = spirv.CompositeConstruct %117, %348, %579, %810 : (f16, f16, f16, f16) -> vector<4xf16>
%1119 = spirv.GL.FMax %1118, %1117 : vector<4xf16>
%1120 = spirv.IsNan %1118 : vector<4xf16>
%1121 = spirv.IsNan %1117 : vector<4xf16>
%1122 = spirv.Select %1120, %1118, %1119 : vector<4xi1>, vector<4xf16>
%1123 = spirv.Select %1121, %1117, %1122 : vector<4xi1>, vector<4xf16>
%1124 = spirv.CompositeConstruct %120, %351, %582, %813 : (f16, f16, f16, f16) -> vector<4xf16>
%1125 = spirv.GL.FMax %1124, %1123 : vector<4xf16>
%1126 = spirv.IsNan %1124 : vector<4xf16>
%1127 = spirv.IsNan %1123 : vector<4xf16>
%1128 = spirv.Select %1126, %1124, %1125 : vector<4xi1>, vector<4xf16>
%1129 = spirv.Select %1127, %1123, %1128 : vector<4xi1>, vector<4xf16>
%1130 = spirv.CompositeConstruct %123, %354, %585, %816 : (f16, f16, f16, f16) -> vector<4xf16>
%1131 = spirv.GL.FMax %1130, %1129 : vector<4xf16>
%1132 = spirv.IsNan %1130 : vector<4xf16>
%1133 = spirv.IsNan %1129 : vector<4xf16>
%1134 = spirv.Select %1132, %1130, %1131 : vector<4xi1>, vector<4xf16>
%1135 = spirv.Select %1133, %1129, %1134 : vector<4xi1>, vector<4xf16>
%1136 = spirv.CompositeConstruct %126, %357, %588, %819 : (f16, f16, f16, f16) -> vector<4xf16>
%1137 = spirv.GL.FMax %1136, %1135 : vector<4xf16>
%1138 = spirv.IsNan %1136 : vector<4xf16>
%1139 = spirv.IsNan %1135 : vector<4xf16>
%1140 = spirv.Select %1138, %1136, %1137 : vector<4xi1>, vector<4xf16>
%1141 = spirv.Select %1139, %1135, %1140 : vector<4xi1>, vector<4xf16>
%1142 = spirv.CompositeConstruct %129, %360, %591, %822 : (f16, f16, f16, f16) -> vector<4xf16>
%1143 = spirv.GL.FMax %1142, %1141 : vector<4xf16>
%1144 = spirv.IsNan %1142 : vector<4xf16>
%1145 = spirv.IsNan %1141 : vector<4xf16>
%1146 = spirv.Select %1144, %1142, %1143 : vector<4xi1>, vector<4xf16>
%1147 = spirv.Select %1145, %1141, %1146 : vector<4xi1>, vector<4xf16>
%1148 = spirv.CompositeConstruct %132, %363, %594, %825 : (f16, f16, f16, f16) -> vector<4xf16>
%1149 = spirv.GL.FMax %1148, %1147 : vector<4xf16>
%1150 = spirv.IsNan %1148 : vector<4xf16>
%1151 = spirv.IsNan %1147 : vector<4xf16>
%1152 = spirv.Select %1150, %1148, %1149 : vector<4xi1>, vector<4xf16>
%1153 = spirv.Select %1151, %1147, %1152 : vector<4xi1>, vector<4xf16>
%1154 = spirv.CompositeConstruct %135, %366, %597, %828 : (f16, f16, f16, f16) -> vector<4xf16>
%1155 = spirv.GL.FMax %1154, %1153 : vector<4xf16>
%1156 = spirv.IsNan %1154 : vector<4xf16>
%1157 = spirv.IsNan %1153 : vector<4xf16>
%1158 = spirv.Select %1156, %1154, %1155 : vector<4xi1>, vector<4xf16>
%1159 = spirv.Select %1157, %1153, %1158 : vector<4xi1>, vector<4xf16>
%1160 = spirv.CompositeConstruct %138, %369, %600, %831 : (f16, f16, f16, f16) -> vector<4xf16>
%1161 = spirv.GL.FMax %1160, %1159 : vector<4xf16>
%1162 = spirv.IsNan %1160 : vector<4xf16>
%1163 = spirv.IsNan %1159 : vector<4xf16>
%1164 = spirv.Select %1162, %1160, %1161 : vector<4xi1>, vector<4xf16>
%1165 = spirv.Select %1163, %1159, %1164 : vector<4xi1>, vector<4xf16>
%1166 = spirv.CompositeConstruct %141, %372, %603, %834 : (f16, f16, f16, f16) -> vector<4xf16>
%1167 = spirv.GL.FMax %1166, %1165 : vector<4xf16>
%1168 = spirv.IsNan %1166 : vector<4xf16>
%1169 = spirv.IsNan %1165 : vector<4xf16>
%1170 = spirv.Select %1168, %1166, %1167 : vector<4xi1>, vector<4xf16>
%1171 = spirv.Select %1169, %1165, %1170 : vector<4xi1>, vector<4xf16>
%1172 = spirv.CompositeConstruct %144, %375, %606, %837 : (f16, f16, f16, f16) -> vector<4xf16>
%1173 = spirv.GL.FMax %1172, %1171 : vector<4xf16>
%1174 = spirv.IsNan %1172 : vector<4xf16>
%1175 = spirv.IsNan %1171 : vector<4xf16>
%1176 = spirv.Select %1174, %1172, %1173 : vector<4xi1>, vector<4xf16>
%1177 = spirv.Select %1175, %1171, %1176 : vector<4xi1>, vector<4xf16>
%1178 = spirv.CompositeConstruct %147, %378, %609, %840 : (f16, f16, f16, f16) -> vector<4xf16>
%1179 = spirv.GL.FMax %1178, %1177 : vector<4xf16>
%1180 = spirv.IsNan %1178 : vector<4xf16>
%1181 = spirv.IsNan %1177 : vector<4xf16>
%1182 = spirv.Select %1180, %1178, %1179 : vector<4xi1>, vector<4xf16>
%1183 = spirv.Select %1181, %1177, %1182 : vector<4xi1>, vector<4xf16>
%1184 = spirv.CompositeConstruct %150, %381, %612, %843 : (f16, f16, f16, f16) -> vector<4xf16>
%1185 = spirv.GL.FMax %1184, %1183 : vector<4xf16>
%1186 = spirv.IsNan %1184 : vector<4xf16>
%1187 = spirv.IsNan %1183 : vector<4xf16>
%1188 = spirv.Select %1186, %1184, %1185 : vector<4xi1>, vector<4xf16>
%1189 = spirv.Select %1187, %1183, %1188 : vector<4xi1>, vector<4xf16>
%1190 = spirv.CompositeConstruct %153, %384, %615, %846 : (f16, f16, f16, f16) -> vector<4xf16>
%1191 = spirv.GL.FMax %1190, %1189 : vector<4xf16>
%1192 = spirv.IsNan %1190 : vector<4xf16>
%1193 = spirv.IsNan %1189 : vector<4xf16>
%1194 = spirv.Select %1192, %1190, %1191 : vector<4xi1>, vector<4xf16>
%1195 = spirv.Select %1193, %1189, %1194 : vector<4xi1>, vector<4xf16>
%1196 = spirv.CompositeConstruct %156, %387, %618, %849 : (f16, f16, f16, f16) -> vector<4xf16>
%1197 = spirv.GL.FMax %1196, %1195 : vector<4xf16>
%1198 = spirv.IsNan %1196 : vector<4xf16>
%1199 = spirv.IsNan %1195 : vector<4xf16>
%1200 = spirv.Select %1198, %1196, %1197 : vector<4xi1>, vector<4xf16>
%1201 = spirv.Select %1199, %1195, %1200 : vector<4xi1>, vector<4xf16>
%1202 = spirv.CompositeConstruct %159, %390, %621, %852 : (f16, f16, f16, f16) -> vector<4xf16>
%1203 = spirv.GL.FMax %1202, %1201 : vector<4xf16>
%1204 = spirv.IsNan %1202 : vector<4xf16>
%1205 = spirv.IsNan %1201 : vector<4xf16>
%1206 = spirv.Select %1204, %1202, %1203 : vector<4xi1>, vector<4xf16>
%1207 = spirv.Select %1205, %1201, %1206 : vector<4xi1>, vector<4xf16>
%1208 = spirv.CompositeConstruct %162, %393, %624, %855 : (f16, f16, f16, f16) -> vector<4xf16>
%1209 = spirv.GL.FMax %1208, %1207 : vector<4xf16>
%1210 = spirv.IsNan %1208 : vector<4xf16>
%1211 = spirv.IsNan %1207 : vector<4xf16>
%1212 = spirv.Select %1210, %1208, %1209 : vector<4xi1>, vector<4xf16>
%1213 = spirv.Select %1211, %1207, %1212 : vector<4xi1>, vector<4xf16>
%1214 = spirv.CompositeConstruct %165, %396, %627, %858 : (f16, f16, f16, f16) -> vector<4xf16>
%1215 = spirv.GL.FMax %1214, %1213 : vector<4xf16>
%1216 = spirv.IsNan %1214 : vector<4xf16>
%1217 = spirv.IsNan %1213 : vector<4xf16>
%1218 = spirv.Select %1216, %1214, %1215 : vector<4xi1>, vector<4xf16>
%1219 = spirv.Select %1217, %1213, %1218 : vector<4xi1>, vector<4xf16>
%1220 = spirv.CompositeConstruct %168, %399, %630, %861 : (f16, f16, f16, f16) -> vector<4xf16>
%1221 = spirv.GL.FMax %1220, %1219 : vector<4xf16>
%1222 = spirv.IsNan %1220 : vector<4xf16>
%1223 = spirv.IsNan %1219 : vector<4xf16>
%1224 = spirv.Select %1222, %1220, %1221 : vector<4xi1>, vector<4xf16>
%1225 = spirv.Select %1223, %1219, %1224 : vector<4xi1>, vector<4xf16>
%1226 = spirv.CompositeConstruct %171, %402, %633, %864 : (f16, f16, f16, f16) -> vector<4xf16>
%1227 = spirv.GL.FMax %1226, %1225 : vector<4xf16>
%1228 = spirv.IsNan %1226 : vector<4xf16>
%1229 = spirv.IsNan %1225 : vector<4xf16>
%1230 = spirv.Select %1228, %1226, %1227 : vector<4xi1>, vector<4xf16>
%1231 = spirv.Select %1229, %1225, %1230 : vector<4xi1>, vector<4xf16>
%1232 = spirv.CompositeConstruct %174, %405, %636, %867 : (f16, f16, f16, f16) -> vector<4xf16>
%1233 = spirv.GL.FMax %1232, %1231 : vector<4xf16>
%1234 = spirv.IsNan %1232 : vector<4xf16>
%1235 = spirv.IsNan %1231 : vector<4xf16>
%1236 = spirv.Select %1234, %1232, %1233 : vector<4xi1>, vector<4xf16>
%1237 = spirv.Select %1235, %1231, %1236 : vector<4xi1>, vector<4xf16>
%1238 = spirv.CompositeConstruct %177, %408, %639, %870 : (f16, f16, f16, f16) -> vector<4xf16>
%1239 = spirv.GL.FMax %1238, %1237 : vector<4xf16>
%1240 = spirv.IsNan %1238 : vector<4xf16>
%1241 = spirv.IsNan %1237 : vector<4xf16>
%1242 = spirv.Select %1240, %1238, %1239 : vector<4xi1>, vector<4xf16>
%1243 = spirv.Select %1241, %1237, %1242 : vector<4xi1>, vector<4xf16>
%1244 = spirv.CompositeConstruct %180, %411, %642, %873 : (f16, f16, f16, f16) -> vector<4xf16>
%1245 = spirv.GL.FMax %1244, %1243 : vector<4xf16>
%1246 = spirv.IsNan %1244 : vector<4xf16>
%1247 = spirv.IsNan %1243 : vector<4xf16>
%1248 = spirv.Select %1246, %1244, %1245 : vector<4xi1>, vector<4xf16>
%1249 = spirv.Select %1247, %1243, %1248 : vector<4xi1>, vector<4xf16>
%1250 = spirv.CompositeConstruct %183, %414, %645, %876 : (f16, f16, f16, f16) -> vector<4xf16>
%1251 = spirv.GL.FMax %1250, %1249 : vector<4xf16>
%1252 = spirv.IsNan %1250 : vector<4xf16>
%1253 = spirv.IsNan %1249 : vector<4xf16>
%1254 = spirv.Select %1252, %1250, %1251 : vector<4xi1>, vector<4xf16>
%1255 = spirv.Select %1253, %1249, %1254 : vector<4xi1>, vector<4xf16>
%1256 = spirv.CompositeConstruct %186, %417, %648, %879 : (f16, f16, f16, f16) -> vector<4xf16>
%1257 = spirv.GL.FMax %1256, %1255 : vector<4xf16>
%1258 = spirv.IsNan %1256 : vector<4xf16>
%1259 = spirv.IsNan %1255 : vector<4xf16>
%1260 = spirv.Select %1258, %1256, %1257 : vector<4xi1>, vector<4xf16>
%1261 = spirv.Select %1259, %1255, %1260 : vector<4xi1>, vector<4xf16>
%1262 = spirv.CompositeConstruct %189, %420, %651, %882 : (f16, f16, f16, f16) -> vector<4xf16>
%1263 = spirv.GL.FMax %1262, %1261 : vector<4xf16>
%1264 = spirv.IsNan %1262 : vector<4xf16>
%1265 = spirv.IsNan %1261 : vector<4xf16>
%1266 = spirv.Select %1264, %1262, %1263 : vector<4xi1>, vector<4xf16>
%1267 = spirv.Select %1265, %1261, %1266 : vector<4xi1>, vector<4xf16>
%1268 = spirv.CompositeConstruct %192, %423, %654, %885 : (f16, f16, f16, f16) -> vector<4xf16>
%1269 = spirv.GL.FMax %1268, %1267 : vector<4xf16>
%1270 = spirv.IsNan %1268 : vector<4xf16>
%1271 = spirv.IsNan %1267 : vector<4xf16>
%1272 = spirv.Select %1270, %1268, %1269 : vector<4xi1>, vector<4xf16>
%1273 = spirv.Select %1271, %1267, %1272 : vector<4xi1>, vector<4xf16>
%1274 = spirv.CompositeConstruct %195, %426, %657, %888 : (f16, f16, f16, f16) -> vector<4xf16>
%1275 = spirv.GL.FMax %1274, %1273 : vector<4xf16>
%1276 = spirv.IsNan %1274 : vector<4xf16>
%1277 = spirv.IsNan %1273 : vector<4xf16>
%1278 = spirv.Select %1276, %1274, %1275 : vector<4xi1>, vector<4xf16>
%1279 = spirv.Select %1277, %1273, %1278 : vector<4xi1>, vector<4xf16>
%1280 = spirv.CompositeConstruct %198, %429, %660, %891 : (f16, f16, f16, f16) -> vector<4xf16>
%1281 = spirv.GL.FMax %1280, %1279 : vector<4xf16>
%1282 = spirv.IsNan %1280 : vector<4xf16>
%1283 = spirv.IsNan %1279 : vector<4xf16>
%1284 = spirv.Select %1282, %1280, %1281 : vector<4xi1>, vector<4xf16>
%1285 = spirv.Select %1283, %1279, %1284 : vector<4xi1>, vector<4xf16>
%1286 = spirv.CompositeConstruct %201, %432, %663, %894 : (f16, f16, f16, f16) -> vector<4xf16>
%1287 = spirv.GL.FMax %1286, %1285 : vector<4xf16>
%1288 = spirv.IsNan %1286 : vector<4xf16>
%1289 = spirv.IsNan %1285 : vector<4xf16>
%1290 = spirv.Select %1288, %1286, %1287 : vector<4xi1>, vector<4xf16>
%1291 = spirv.Select %1289, %1285, %1290 : vector<4xi1>, vector<4xf16>
%1292 = spirv.CompositeConstruct %204, %435, %666, %897 : (f16, f16, f16, f16) -> vector<4xf16>
%1293 = spirv.GL.FMax %1292, %1291 : vector<4xf16>
%1294 = spirv.IsNan %1292 : vector<4xf16>
%1295 = spirv.IsNan %1291 : vector<4xf16>
%1296 = spirv.Select %1294, %1292, %1293 : vector<4xi1>, vector<4xf16>
%1297 = spirv.Select %1295, %1291, %1296 : vector<4xi1>, vector<4xf16>
%1298 = spirv.CompositeConstruct %207, %438, %669, %900 : (f16, f16, f16, f16) -> vector<4xf16>
%1299 = spirv.GL.FMax %1298, %1297 : vector<4xf16>
%1300 = spirv.IsNan %1298 : vector<4xf16>
%1301 = spirv.IsNan %1297 : vector<4xf16>
%1302 = spirv.Select %1300, %1298, %1299 : vector<4xi1>, vector<4xf16>
%1303 = spirv.Select %1301, %1297, %1302 : vector<4xi1>, vector<4xf16>
%1304 = spirv.CompositeConstruct %210, %441, %672, %903 : (f16, f16, f16, f16) -> vector<4xf16>
%1305 = spirv.GL.FMax %1304, %1303 : vector<4xf16>
%1306 = spirv.IsNan %1304 : vector<4xf16>
%1307 = spirv.IsNan %1303 : vector<4xf16>
%1308 = spirv.Select %1306, %1304, %1305 : vector<4xi1>, vector<4xf16>
%1309 = spirv.Select %1307, %1303, %1308 : vector<4xi1>, vector<4xf16>
%1310 = spirv.CompositeConstruct %213, %444, %675, %906 : (f16, f16, f16, f16) -> vector<4xf16>
%1311 = spirv.GL.FMax %1310, %1309 : vector<4xf16>
%1312 = spirv.IsNan %1310 : vector<4xf16>
%1313 = spirv.IsNan %1309 : vector<4xf16>
%1314 = spirv.Select %1312, %1310, %1311 : vector<4xi1>, vector<4xf16>
%1315 = spirv.Select %1313, %1309, %1314 : vector<4xi1>, vector<4xf16>
%1316 = spirv.CompositeConstruct %216, %447, %678, %909 : (f16, f16, f16, f16) -> vector<4xf16>
%1317 = spirv.GL.FMax %1316, %1315 : vector<4xf16>
%1318 = spirv.IsNan %1316 : vector<4xf16>
%1319 = spirv.IsNan %1315 : vector<4xf16>
%1320 = spirv.Select %1318, %1316, %1317 : vector<4xi1>, vector<4xf16>
%1321 = spirv.Select %1319, %1315, %1320 : vector<4xi1>, vector<4xf16>
%1322 = spirv.CompositeConstruct %219, %450, %681, %912 : (f16, f16, f16, f16) -> vector<4xf16>
%1323 = spirv.GL.FMax %1322, %1321 : vector<4xf16>
%1324 = spirv.IsNan %1322 : vector<4xf16>
%1325 = spirv.IsNan %1321 : vector<4xf16>
%1326 = spirv.Select %1324, %1322, %1323 : vector<4xi1>, vector<4xf16>
%1327 = spirv.Select %1325, %1321, %1326 : vector<4xi1>, vector<4xf16>
%1328 = spirv.CompositeConstruct %222, %453, %684, %915 : (f16, f16, f16, f16) -> vector<4xf16>
%1329 = spirv.GL.FMax %1328, %1327 : vector<4xf16>
%1330 = spirv.IsNan %1328 : vector<4xf16>
%1331 = spirv.IsNan %1327 : vector<4xf16>
%1332 = spirv.Select %1330, %1328, %1329 : vector<4xi1>, vector<4xf16>
%1333 = spirv.Select %1331, %1327, %1332 : vector<4xi1>, vector<4xf16>
%1334 = spirv.CompositeConstruct %225, %456, %687, %918 : (f16, f16, f16, f16) -> vector<4xf16>
%1335 = spirv.GL.FMax %1334, %1333 : vector<4xf16>
%1336 = spirv.IsNan %1334 : vector<4xf16>
%1337 = spirv.IsNan %1333 : vector<4xf16>
%1338 = spirv.Select %1336, %1334, %1335 : vector<4xi1>, vector<4xf16>
%1339 = spirv.Select %1337, %1333, %1338 : vector<4xi1>, vector<4xf16>
%1340 = spirv.CompositeConstruct %228, %459, %690, %921 : (f16, f16, f16, f16) -> vector<4xf16>
%1341 = spirv.GL.FMax %1340, %1339 : vector<4xf16>
%1342 = spirv.IsNan %1340 : vector<4xf16>
%1343 = spirv.IsNan %1339 : vector<4xf16>
%1344 = spirv.Select %1342, %1340, %1341 : vector<4xi1>, vector<4xf16>
%1345 = spirv.Select %1343, %1339, %1344 : vector<4xi1>, vector<4xf16>
%1346 = spirv.CompositeConstruct %231, %462, %693, %924 : (f16, f16, f16, f16) -> vector<4xf16>
%1347 = spirv.GL.FMax %1346, %1345 : vector<4xf16>
%1348 = spirv.IsNan %1346 : vector<4xf16>
%1349 = spirv.IsNan %1345 : vector<4xf16>
%1350 = spirv.Select %1348, %1346, %1347 : vector<4xi1>, vector<4xf16>
%1351 = spirv.Select %1349, %1345, %1350 : vector<4xi1>, vector<4xf16>
%1352 = spirv.CompositeConstruct %234, %465, %696, %927 : (f16, f16, f16, f16) -> vector<4xf16>
%1353 = spirv.GL.FMax %1352, %1351 : vector<4xf16>
%1354 = spirv.IsNan %1352 : vector<4xf16>
%1355 = spirv.IsNan %1351 : vector<4xf16>
%1356 = spirv.Select %1354, %1352, %1353 : vector<4xi1>, vector<4xf16>
%1357 = spirv.Select %1355, %1351, %1356 : vector<4xi1>, vector<4xf16>
%1358 = spirv.CompositeConstruct %237, %468, %699, %930 : (f16, f16, f16, f16) -> vector<4xf16>
%1359 = spirv.GL.FMax %1358, %1357 : vector<4xf16>
%1360 = spirv.IsNan %1358 : vector<4xf16>
%1361 = spirv.IsNan %1357 : vector<4xf16>
%1362 = spirv.Select %1360, %1358, %1359 : vector<4xi1>, vector<4xf16>
%1363 = spirv.Select %1361, %1357, %1362 : vector<4xi1>, vector<4xf16>
%1364 = spirv.CompositeConstruct %240, %471, %702, %933 : (f16, f16, f16, f16) -> vector<4xf16>
%1365 = spirv.GL.FMax %1364, %1363 : vector<4xf16>
%1366 = spirv.IsNan %1364 : vector<4xf16>
%1367 = spirv.IsNan %1363 : vector<4xf16>
%1368 = spirv.Select %1366, %1364, %1365 : vector<4xi1>, vector<4xf16>
%1369 = spirv.Select %1367, %1363, %1368 : vector<4xi1>, vector<4xf16>
%1370 = spirv.CompositeConstruct %243, %474, %705, %936 : (f16, f16, f16, f16) -> vector<4xf16>
%1371 = spirv.GL.FMax %1370, %1369 : vector<4xf16>
%1372 = spirv.IsNan %1370 : vector<4xf16>
%1373 = spirv.IsNan %1369 : vector<4xf16>
%1374 = spirv.Select %1372, %1370, %1371 : vector<4xi1>, vector<4xf16>
%1375 = spirv.Select %1373, %1369, %1374 : vector<4xi1>, vector<4xf16>
%1376 = spirv.CompositeConstruct %246, %477, %708, %939 : (f16, f16, f16, f16) -> vector<4xf16>
%1377 = spirv.GL.FMax %1376, %1375 : vector<4xf16>
%1378 = spirv.IsNan %1376 : vector<4xf16>
%1379 = spirv.IsNan %1375 : vector<4xf16>
%1380 = spirv.Select %1378, %1376, %1377 : vector<4xi1>, vector<4xf16>
%1381 = spirv.Select %1379, %1375, %1380 : vector<4xi1>, vector<4xf16>
%1382 = spirv.CompositeConstruct %249, %480, %711, %942 : (f16, f16, f16, f16) -> vector<4xf16>
%1383 = spirv.GL.FMax %1382, %1381 : vector<4xf16>
%1384 = spirv.IsNan %1382 : vector<4xf16>
%1385 = spirv.IsNan %1381 : vector<4xf16>
%1386 = spirv.Select %1384, %1382, %1383 : vector<4xi1>, vector<4xf16>
%1387 = spirv.Select %1385, %1381, %1386 : vector<4xi1>, vector<4xf16>
%1388 = spirv.CompositeConstruct %252, %483, %714, %945 : (f16, f16, f16, f16) -> vector<4xf16>
%1389 = spirv.GL.FMax %1388, %1387 : vector<4xf16>
%1390 = spirv.IsNan %1388 : vector<4xf16>
%1391 = spirv.IsNan %1387 : vector<4xf16>
%1392 = spirv.Select %1390, %1388, %1389 : vector<4xi1>, vector<4xf16>
%1393 = spirv.Select %1391, %1387, %1392 : vector<4xi1>, vector<4xf16>
%1394 = spirv.CompositeConstruct %255, %486, %717, %948 : (f16, f16, f16, f16) -> vector<4xf16>
%1395 = spirv.GL.FMax %1394, %1393 : vector<4xf16>
%1396 = spirv.IsNan %1394 : vector<4xf16>
%1397 = spirv.IsNan %1393 : vector<4xf16>
%1398 = spirv.Select %1396, %1394, %1395 : vector<4xi1>, vector<4xf16>
%1399 = spirv.Select %1397, %1393, %1398 : vector<4xi1>, vector<4xf16>
%1400 = spirv.CompositeConstruct %258, %489, %720, %951 : (f16, f16, f16, f16) -> vector<4xf16>
%1401 = spirv.GL.FMax %1400, %1399 : vector<4xf16>
%1402 = spirv.IsNan %1400 : vector<4xf16>
%1403 = spirv.IsNan %1399 : vector<4xf16>
%1404 = spirv.Select %1402, %1400, %1401 : vector<4xi1>, vector<4xf16>
%1405 = spirv.Select %1403, %1399, %1404 : vector<4xi1>, vector<4xf16>
%1406 = spirv.CompositeConstruct %261, %492, %723, %954 : (f16, f16, f16, f16) -> vector<4xf16>
%1407 = spirv.GL.FMax %1406, %1405 : vector<4xf16>
%1408 = spirv.IsNan %1406 : vector<4xf16>
%1409 = spirv.IsNan %1405 : vector<4xf16>
%1410 = spirv.Select %1408, %1406, %1407 : vector<4xi1>, vector<4xf16>
%1411 = spirv.Select %1409, %1405, %1410 : vector<4xi1>, vector<4xf16>
%1412 = spirv.CompositeExtract %1411[0 : i32] : vector<4xf16>
%1413 = spirv.CompositeExtract %1411[1 : i32] : vector<4xf16>
%1414 = spirv.CompositeExtract %1411[2 : i32] : vector<4xf16>
%1415 = spirv.CompositeExtract %1411[3 : i32] : vector<4xf16>
%1416 = spirv.FSub %33, %1412 : f16
%1417 = spirv.FSub %36, %1412 : f16
%1418 = spirv.FSub %39, %1412 : f16
%1419 = spirv.FSub %42, %1412 : f16
%1420 = spirv.FSub %45, %1412 : f16
%1421 = spirv.FSub %48, %1412 : f16
%1422 = spirv.FSub %51, %1412 : f16
%1423 = spirv.FSub %54, %1412 : f16
%1424 = spirv.FSub %57, %1412 : f16
%1425 = spirv.FSub %60, %1412 : f16
%1426 = spirv.FSub %63, %1412 : f16
%1427 = spirv.FSub %66, %1412 : f16
%1428 = spirv.FSub %69, %1412 : f16
%1429 = spirv.FSub %72, %1412 : f16
%1430 = spirv.FSub %75, %1412 : f16
%1431 = spirv.FSub %78, %1412 : f16
%1432 = spirv.FSub %81, %1412 : f16
%1433 = spirv.FSub %84, %1412 : f16
%1434 = spirv.FSub %87, %1412 : f16
%1435 = spirv.FSub %90, %1412 : f16
%1436 = spirv.FSub %93, %1412 : f16
%1437 = spirv.FSub %96, %1412 : f16
%1438 = spirv.FSub %99, %1412 : f16
%1439 = spirv.FSub %102, %1412 : f16
%1440 = spirv.FSub %105, %1412 : f16
%1441 = spirv.FSub %108, %1412 : f16
%1442 = spirv.FSub %111, %1412 : f16
%1443 = spirv.FSub %114, %1412 : f16
%1444 = spirv.FSub %117, %1412 : f16
%1445 = spirv.FSub %120, %1412 : f16
%1446 = spirv.FSub %123, %1412 : f16
%1447 = spirv.FSub %126, %1412 : f16
%1448 = spirv.FSub %129, %1412 : f16
%1449 = spirv.FSub %132, %1412 : f16
%1450 = spirv.FSub %135, %1412 : f16
%1451 = spirv.FSub %138, %1412 : f16
%1452 = spirv.FSub %141, %1412 : f16
%1453 = spirv.FSub %144, %1412 : f16
%1454 = spirv.FSub %147, %1412 : f16
%1455 = spirv.FSub %150, %1412 : f16
%1456 = spirv.FSub %153, %1412 : f16
%1457 = spirv.FSub %156, %1412 : f16
%1458 = spirv.FSub %159, %1412 : f16
%1459 = spirv.FSub %162, %1412 : f16
%1460 = spirv.FSub %165, %1412 : f16
%1461 = spirv.FSub %168, %1412 : f16
%1462 = spirv.FSub %171, %1412 : f16
%1463 = spirv.FSub %174, %1412 : f16
%1464 = spirv.FSub %177, %1412 : f16
%1465 = spirv.FSub %180, %1412 : f16
%1466 = spirv.FSub %183, %1412 : f16
%1467 = spirv.FSub %186, %1412 : f16
%1468 = spirv.FSub %189, %1412 : f16
%1469 = spirv.FSub %192, %1412 : f16
%1470 = spirv.FSub %195, %1412 : f16
%1471 = spirv.FSub %198, %1412 : f16
%1472 = spirv.FSub %201, %1412 : f16
%1473 = spirv.FSub %204, %1412 : f16
%1474 = spirv.FSub %207, %1412 : f16
%1475 = spirv.FSub %210, %1412 : f16
%1476 = spirv.FSub %213, %1412 : f16
%1477 = spirv.FSub %216, %1412 : f16
%1478 = spirv.FSub %219, %1412 : f16
%1479 = spirv.FSub %222, %1412 : f16
%1480 = spirv.FSub %225, %1412 : f16
%1481 = spirv.FSub %228, %1412 : f16
%1482 = spirv.FSub %231, %1412 : f16
%1483 = spirv.FSub %234, %1412 : f16
%1484 = spirv.FSub %237, %1412 : f16
%1485 = spirv.FSub %240, %1412 : f16
%1486 = spirv.FSub %243, %1412 : f16
%1487 = spirv.FSub %246, %1412 : f16
%1488 = spirv.FSub %249, %1412 : f16
%1489 = spirv.FSub %252, %1412 : f16
%1490 = spirv.FSub %255, %1412 : f16
%1491 = spirv.FSub %258, %1412 : f16
%1492 = spirv.FSub %261, %1412 : f16
%1493 = spirv.FSub %264, %1413 : f16
%1494 = spirv.FSub %267, %1413 : f16
%1495 = spirv.FSub %270, %1413 : f16
%1496 = spirv.FSub %273, %1413 : f16
%1497 = spirv.FSub %276, %1413 : f16
%1498 = spirv.FSub %279, %1413 : f16
%1499 = spirv.FSub %282, %1413 : f16
%1500 = spirv.FSub %285, %1413 : f16
%1501 = spirv.FSub %288, %1413 : f16
%1502 = spirv.FSub %291, %1413 : f16
%1503 = spirv.FSub %294, %1413 : f16
%1504 = spirv.FSub %297, %1413 : f16
%1505 = spirv.FSub %300, %1413 : f16
%1506 = spirv.FSub %303, %1413 : f16
%1507 = spirv.FSub %306, %1413 : f16
%1508 = spirv.FSub %309, %1413 : f16
%1509 = spirv.FSub %312, %1413 : f16
%1510 = spirv.FSub %315, %1413 : f16
%1511 = spirv.FSub %318, %1413 : f16
%1512 = spirv.FSub %321, %1413 : f16
%1513 = spirv.FSub %324, %1413 : f16
%1514 = spirv.FSub %327, %1413 : f16
%1515 = spirv.FSub %330, %1413 : f16
%1516 = spirv.FSub %333, %1413 : f16
%1517 = spirv.FSub %336, %1413 : f16
%1518 = spirv.FSub %339, %1413 : f16
%1519 = spirv.FSub %342, %1413 : f16
%1520 = spirv.FSub %345, %1413 : f16
%1521 = spirv.FSub %348, %1413 : f16
%1522 = spirv.FSub %351, %1413 : f16
%1523 = spirv.FSub %354, %1413 : f16
%1524 = spirv.FSub %357, %1413 : f16
%1525 = spirv.FSub %360, %1413 : f16
%1526 = spirv.FSub %363, %1413 : f16
%1527 = spirv.FSub %366, %1413 : f16
%1528 = spirv.FSub %369, %1413 : f16
%1529 = spirv.FSub %372, %1413 : f16
%1530 = spirv.FSub %375, %1413 : f16
%1531 = spirv.FSub %378, %1413 : f16
%1532 = spirv.FSub %381, %1413 : f16
%1533 = spirv.FSub %384, %1413 : f16
%1534 = spirv.FSub %387, %1413 : f16
%1535 = spirv.FSub %390, %1413 : f16
%1536 = spirv.FSub %393, %1413 : f16
%1537 = spirv.FSub %396, %1413 : f16
%1538 = spirv.FSub %399, %1413 : f16
%1539 = spirv.FSub %402, %1413 : f16
%1540 = spirv.FSub %405, %1413 : f16
%1541 = spirv.FSub %408, %1413 : f16
%1542 = spirv.FSub %411, %1413 : f16
%1543 = spirv.FSub %414, %1413 : f16
%1544 = spirv.FSub %417, %1413 : f16
%1545 = spirv.FSub %420, %1413 : f16
%1546 = spirv.FSub %423, %1413 : f16
%1547 = spirv.FSub %426, %1413 : f16
%1548 = spirv.FSub %429, %1413 : f16
%1549 = spirv.FSub %432, %1413 : f16
%1550 = spirv.FSub %435, %1413 : f16
%1551 = spirv.FSub %438, %1413 : f16
%1552 = spirv.FSub %441, %1413 : f16
%1553 = spirv.FSub %444, %1413 : f16
%1554 = spirv.FSub %447, %1413 : f16
%1555 = spirv.FSub %450, %1413 : f16
%1556 = spirv.FSub %453, %1413 : f16
%1557 = spirv.FSub %456, %1413 : f16
%1558 = spirv.FSub %459, %1413 : f16
%1559 = spirv.FSub %462, %1413 : f16
%1560 = spirv.FSub %465, %1413 : f16
%1561 = spirv.FSub %468, %1413 : f16
%1562 = spirv.FSub %471, %1413 : f16
%1563 = spirv.FSub %474, %1413 : f16
%1564 = spirv.FSub %477, %1413 : f16
%1565 = spirv.FSub %480, %1413 : f16
%1566 = spirv.FSub %483, %1413 : f16
%1567 = spirv.FSub %486, %1413 : f16
%1568 = spirv.FSub %489, %1413 : f16
%1569 = spirv.FSub %492, %1413 : f16
%1570 = spirv.FSub %495, %1414 : f16
%1571 = spirv.FSub %498, %1414 : f16
%1572 = spirv.FSub %501, %1414 : f16
%1573 = spirv.FSub %504, %1414 : f16
%1574 = spirv.FSub %507, %1414 : f16
%1575 = spirv.FSub %510, %1414 : f16
%1576 = spirv.FSub %513, %1414 : f16
%1577 = spirv.FSub %516, %1414 : f16
%1578 = spirv.FSub %519, %1414 : f16
%1579 = spirv.FSub %522, %1414 : f16
%1580 = spirv.FSub %525, %1414 : f16
%1581 = spirv.FSub %528, %1414 : f16
%1582 = spirv.FSub %531, %1414 : f16
%1583 = spirv.FSub %534, %1414 : f16
%1584 = spirv.FSub %537, %1414 : f16
%1585 = spirv.FSub %540, %1414 : f16
%1586 = spirv.FSub %543, %1414 : f16
%1587 = spirv.FSub %546, %1414 : f16
%1588 = spirv.FSub %549, %1414 : f16
%1589 = spirv.FSub %552, %1414 : f16
%1590 = spirv.FSub %555, %1414 : f16
%1591 = spirv.FSub %558, %1414 : f16
%1592 = spirv.FSub %561, %1414 : f16
%1593 = spirv.FSub %564, %1414 : f16
%1594 = spirv.FSub %567, %1414 : f16
%1595 = spirv.FSub %570, %1414 : f16
%1596 = spirv.FSub %573, %1414 : f16
%1597 = spirv.FSub %576, %1414 : f16
%1598 = spirv.FSub %579, %1414 : f16
%1599 = spirv.FSub %582, %1414 : f16
%1600 = spirv.FSub %585, %1414 : f16
%1601 = spirv.FSub %588, %1414 : f16
%1602 = spirv.FSub %591, %1414 : f16
%1603 = spirv.FSub %594, %1414 : f16
%1604 = spirv.FSub %597, %1414 : f16
%1605 = spirv.FSub %600, %1414 : f16
%1606 = spirv.FSub %603, %1414 : f16
%1607 = spirv.FSub %606, %1414 : f16
%1608 = spirv.FSub %609, %1414 : f16
%1609 = spirv.FSub %612, %1414 : f16
%1610 = spirv.FSub %615, %1414 : f16
%1611 = spirv.FSub %618, %1414 : f16
%1612 = spirv.FSub %621, %1414 : f16
%1613 = spirv.FSub %624, %1414 : f16
%1614 = spirv.FSub %627, %1414 : f16
%1615 = spirv.FSub %630, %1414 : f16
%1616 = spirv.FSub %633, %1414 : f16
%1617 = spirv.FSub %636, %1414 : f16
%1618 = spirv.FSub %639, %1414 : f16
%1619 = spirv.FSub %642, %1414 : f16
%1620 = spirv.FSub %645, %1414 : f16
%1621 = spirv.FSub %648, %1414 : f16
%1622 = spirv.FSub %651, %1414 : f16
%1623 = spirv.FSub %654, %1414 : f16
%1624 = spirv.FSub %657, %1414 : f16
%1625 = spirv.FSub %660, %1414 : f16
%1626 = spirv.FSub %663, %1414 : f16
%1627 = spirv.FSub %666, %1414 : f16
%1628 = spirv.FSub %669, %1414 : f16
%1629 = spirv.FSub %672, %1414 : f16
%1630 = spirv.FSub %675, %1414 : f16
%1631 = spirv.FSub %678, %1414 : f16
%1632 = spirv.FSub %681, %1414 : f16
%1633 = spirv.FSub %684, %1414 : f16
%1634 = spirv.FSub %687, %1414 : f16
%1635 = spirv.FSub %690, %1414 : f16
%1636 = spirv.FSub %693, %1414 : f16
%1637 = spirv.FSub %696, %1414 : f16
%1638 = spirv.FSub %699, %1414 : f16
%1639 = spirv.FSub %702, %1414 : f16
%1640 = spirv.FSub %705, %1414 : f16
%1641 = spirv.FSub %708, %1414 : f16
%1642 = spirv.FSub %711, %1414 : f16
%1643 = spirv.FSub %714, %1414 : f16
%1644 = spirv.FSub %717, %1414 : f16
%1645 = spirv.FSub %720, %1414 : f16
%1646 = spirv.FSub %723, %1414 : f16
%1647 = spirv.FSub %726, %1415 : f16
%1648 = spirv.FSub %729, %1415 : f16
%1649 = spirv.FSub %732, %1415 : f16
%1650 = spirv.FSub %735, %1415 : f16
%1651 = spirv.FSub %738, %1415 : f16
%1652 = spirv.FSub %741, %1415 : f16
%1653 = spirv.FSub %744, %1415 : f16
%1654 = spirv.FSub %747, %1415 : f16
%1655 = spirv.FSub %750, %1415 : f16
%1656 = spirv.FSub %753, %1415 : f16
%1657 = spirv.FSub %756, %1415 : f16
%1658 = spirv.FSub %759, %1415 : f16
%1659 = spirv.FSub %762, %1415 : f16
%1660 = spirv.FSub %765, %1415 : f16
%1661 = spirv.FSub %768, %1415 : f16
%1662 = spirv.FSub %771, %1415 : f16
%1663 = spirv.FSub %774, %1415 : f16
%1664 = spirv.FSub %777, %1415 : f16
%1665 = spirv.FSub %780, %1415 : f16
%1666 = spirv.FSub %783, %1415 : f16
%1667 = spirv.FSub %786, %1415 : f16
%1668 = spirv.FSub %789, %1415 : f16
%1669 = spirv.FSub %792, %1415 : f16
%1670 = spirv.FSub %795, %1415 : f16
%1671 = spirv.FSub %798, %1415 : f16
%1672 = spirv.FSub %801, %1415 : f16
%1673 = spirv.FSub %804, %1415 : f16
%1674 = spirv.FSub %807, %1415 : f16
%1675 = spirv.FSub %810, %1415 : f16
%1676 = spirv.FSub %813, %1415 : f16
%1677 = spirv.FSub %816, %1415 : f16
%1678 = spirv.FSub %819, %1415 : f16
%1679 = spirv.FSub %822, %1415 : f16
%1680 = spirv.FSub %825, %1415 : f16
%1681 = spirv.FSub %828, %1415 : f16
%1682 = spirv.FSub %831, %1415 : f16
%1683 = spirv.FSub %834, %1415 : f16
%1684 = spirv.FSub %837, %1415 : f16
%1685 = spirv.FSub %840, %1415 : f16
%1686 = spirv.FSub %843, %1415 : f16
%1687 = spirv.FSub %846, %1415 : f16
%1688 = spirv.FSub %849, %1415 : f16
%1689 = spirv.FSub %852, %1415 : f16
%1690 = spirv.FSub %855, %1415 : f16
%1691 = spirv.FSub %858, %1415 : f16
%1692 = spirv.FSub %861, %1415 : f16
%1693 = spirv.FSub %864, %1415 : f16
%1694 = spirv.FSub %867, %1415 : f16
%1695 = spirv.FSub %870, %1415 : f16
%1696 = spirv.FSub %873, %1415 : f16
%1697 = spirv.FSub %876, %1415 : f16
%1698 = spirv.FSub %879, %1415 : f16
%1699 = spirv.FSub %882, %1415 : f16
%1700 = spirv.FSub %885, %1415 : f16
%1701 = spirv.FSub %888, %1415 : f16
%1702 = spirv.FSub %891, %1415 : f16
%1703 = spirv.FSub %894, %1415 : f16
%1704 = spirv.FSub %897, %1415 : f16
%1705 = spirv.FSub %900, %1415 : f16
%1706 = spirv.FSub %903, %1415 : f16
%1707 = spirv.FSub %906, %1415 : f16
%1708 = spirv.FSub %909, %1415 : f16
%1709 = spirv.FSub %912, %1415 : f16
%1710 = spirv.FSub %915, %1415 : f16
%1711 = spirv.FSub %918, %1415 : f16
%1712 = spirv.FSub %921, %1415 : f16
%1713 = spirv.FSub %924, %1415 : f16
%1714 = spirv.FSub %927, %1415 : f16
%1715 = spirv.FSub %930, %1415 : f16
%1716 = spirv.FSub %933, %1415 : f16
%1717 = spirv.FSub %936, %1415 : f16
%1718 = spirv.FSub %939, %1415 : f16
%1719 = spirv.FSub %942, %1415 : f16
%1720 = spirv.FSub %945, %1415 : f16
%1721 = spirv.FSub %948, %1415 : f16
%1722 = spirv.FSub %951, %1415 : f16
%1723 = spirv.FSub %954, %1415 : f16
%1724 = spirv.FConvert %1416 : f16 to f32
%1725 = spirv.IsNan %1724 : f32
%1726 = spirv.LogicalOr %1725, %1725 : i1
%1727 = spirv.FMul %1724, %cst_f32_0 : f32
%1728 = spirv.GL.Floor %1727 : f32
%1729 = spirv.FMul %1728, %cst_f32 : f32
%1730 = spirv.FSub %1724, %1729 : f32
%1731 = spirv.FMul %1730, %1730 : f32
%1732 = spirv.FMul %1731, %1731 : f32
%1733 = spirv.GL.Fma %cst_f32_1, %1730, %cst_f32_1 : f32
%1734 = spirv.GL.Fma %cst_f32_3, %1730, %cst_f32_2 : f32
%1735 = spirv.GL.Fma %cst_f32_5, %1730, %cst_f32_4 : f32
%1736 = spirv.GL.Fma %1734, %1731, %1733 : f32
%1737 = spirv.GL.Fma %1735, %1732, %1736 : f32
%1738 = spirv.ConvertFToS %1728 : f32 to i32
%1739 = spirv.IAdd %1738, %cst127_i32 : i32
%1740 = spirv.ShiftLeftLogical %1739, %cst23_i32 : i32, i32
%1741 = spirv.Bitcast %1740 : i32 to f32
%1742 = spirv.FMul %1737, %1741 : f32
%1743 = spirv.SLessThanEqual %1738, %cst127_i32 : i32
%1744 = spirv.SGreaterThanEqual %1738, %cst-127_i32 : i32
%1745 = spirv.FOrdEqual %1724, %cst_f32_8 : f32
%1746 = spirv.FOrdEqual %1724, %cst_f32_7 : f32
%1747 = spirv.FOrdGreaterThan %1724, %cst_f32_6 : f32
%1748 = spirv.LogicalAnd %1743, %1744 : i1
%1749 = spirv.Select %1747, %cst_f32_7, %cst_f32_9 : i1, f32
%1750 = spirv.Select %1748, %1742, %1749 : i1, f32
%1751 = spirv.Select %1746, %cst_f32_7, %1750 : i1, f32
%1752 = spirv.Select %1745, %cst_f32_6, %1751 : i1, f32
%1753 = spirv.Select %1726, %1724, %1752 : i1, f32
%1754 = spirv.FConvert %1753 : f32 to f16
%1755 = spirv.FConvert %1417 : f16 to f32
%1756 = spirv.IsNan %1755 : f32
%1757 = spirv.LogicalOr %1756, %1756 : i1
%1758 = spirv.FMul %1755, %cst_f32_0 : f32
%1759 = spirv.GL.Floor %1758 : f32
%1760 = spirv.FMul %1759, %cst_f32 : f32
%1761 = spirv.FSub %1755, %1760 : f32
%1762 = spirv.FMul %1761, %1761 : f32
%1763 = spirv.FMul %1762, %1762 : f32
%1764 = spirv.GL.Fma %cst_f32_1, %1761, %cst_f32_1 : f32
%1765 = spirv.GL.Fma %cst_f32_3, %1761, %cst_f32_2 : f32
%1766 = spirv.GL.Fma %cst_f32_5, %1761, %cst_f32_4 : f32
%1767 = spirv.GL.Fma %1765, %1762, %1764 : f32
%1768 = spirv.GL.Fma %1766, %1763, %1767 : f32
%1769 = spirv.ConvertFToS %1759 : f32 to i32
%1770 = spirv.IAdd %1769, %cst127_i32 : i32
%1771 = spirv.ShiftLeftLogical %1770, %cst23_i32 : i32, i32
%1772 = spirv.Bitcast %1771 : i32 to f32
%1773 = spirv.FMul %1768, %1772 : f32
%1774 = spirv.SLessThanEqual %1769, %cst127_i32 : i32
%1775 = spirv.SGreaterThanEqual %1769, %cst-127_i32 : i32
%1776 = spirv.FOrdEqual %1755, %cst_f32_8 : f32
%1777 = spirv.FOrdEqual %1755, %cst_f32_7 : f32
%1778 = spirv.FOrdGreaterThan %1755, %cst_f32_6 : f32
%1779 = spirv.LogicalAnd %1774, %1775 : i1
%1780 = spirv.Select %1778, %cst_f32_7, %cst_f32_9 : i1, f32
%1781 = spirv.Select %1779, %1773, %1780 : i1, f32
%1782 = spirv.Select %1777, %cst_f32_7, %1781 : i1, f32
%1783 = spirv.Select %1776, %cst_f32_6, %1782 : i1, f32
%1784 = spirv.Select %1757, %1755, %1783 : i1, f32
%1785 = spirv.FConvert %1784 : f32 to f16
%1786 = spirv.FConvert %1418 : f16 to f32
%1787 = spirv.IsNan %1786 : f32
%1788 = spirv.LogicalOr %1787, %1787 : i1
%1789 = spirv.FMul %1786, %cst_f32_0 : f32
%1790 = spirv.GL.Floor %1789 : f32
%1791 = spirv.FMul %1790, %cst_f32 : f32
%1792 = spirv.FSub %1786, %1791 : f32
%1793 = spirv.FMul %1792, %1792 : f32
%1794 = spirv.FMul %1793, %1793 : f32
%1795 = spirv.GL.Fma %cst_f32_1, %1792, %cst_f32_1 : f32
%1796 = spirv.GL.Fma %cst_f32_3, %1792, %cst_f32_2 : f32
%1797 = spirv.GL.Fma %cst_f32_5, %1792, %cst_f32_4 : f32
%1798 = spirv.GL.Fma %1796, %1793, %1795 : f32
%1799 = spirv.GL.Fma %1797, %1794, %1798 : f32
%1800 = spirv.ConvertFToS %1790 : f32 to i32
%1801 = spirv.IAdd %1800, %cst127_i32 : i32
%1802 = spirv.ShiftLeftLogical %1801, %cst23_i32 : i32, i32
%1803 = spirv.Bitcast %1802 : i32 to f32
%1804 = spirv.FMul %1799, %1803 : f32
%1805 = spirv.SLessThanEqual %1800, %cst127_i32 : i32
%1806 = spirv.SGreaterThanEqual %1800, %cst-127_i32 : i32
%1807 = spirv.FOrdEqual %1786, %cst_f32_8 : f32
%1808 = spirv.FOrdEqual %1786, %cst_f32_7 : f32
%1809 = spirv.FOrdGreaterThan %1786, %cst_f32_6 : f32
%1810 = spirv.LogicalAnd %1805, %1806 : i1
%1811 = spirv.Select %1809, %cst_f32_7, %cst_f32_9 : i1, f32
%1812 = spirv.Select %1810, %1804, %1811 : i1, f32
%1813 = spirv.Select %1808, %cst_f32_7, %1812 : i1, f32
%1814 = spirv.Select %1807, %cst_f32_6, %1813 : i1, f32
%1815 = spirv.Select %1788, %1786, %1814 : i1, f32
%1816 = spirv.FConvert %1815 : f32 to f16
%1817 = spirv.FConvert %1419 : f16 to f32
%1818 = spirv.IsNan %1817 : f32
%1819 = spirv.LogicalOr %1818, %1818 : i1
%1820 = spirv.FMul %1817, %cst_f32_0 : f32
%1821 = spirv.GL.Floor %1820 : f32
%1822 = spirv.FMul %1821, %cst_f32 : f32
%1823 = spirv.FSub %1817, %1822 : f32
%1824 = spirv.FMul %1823, %1823 : f32
%1825 = spirv.FMul %1824, %1824 : f32
%1826 = spirv.GL.Fma %cst_f32_1, %1823, %cst_f32_1 : f32
%1827 = spirv.GL.Fma %cst_f32_3, %1823, %cst_f32_2 : f32
%1828 = spirv.GL.Fma %cst_f32_5, %1823, %cst_f32_4 : f32
%1829 = spirv.GL.Fma %1827, %1824, %1826 : f32
%1830 = spirv.GL.Fma %1828, %1825, %1829 : f32
%1831 = spirv.ConvertFToS %1821 : f32 to i32
%1832 = spirv.IAdd %1831, %cst127_i32 : i32
%1833 = spirv.ShiftLeftLogical %1832, %cst23_i32 : i32, i32
%1834 = spirv.Bitcast %1833 : i32 to f32
%1835 = spirv.FMul %1830, %1834 : f32
%1836 = spirv.SLessThanEqual %1831, %cst127_i32 : i32
%1837 = spirv.SGreaterThanEqual %1831, %cst-127_i32 : i32
%1838 = spirv.FOrdEqual %1817, %cst_f32_8 : f32
%1839 = spirv.FOrdEqual %1817, %cst_f32_7 : f32
%1840 = spirv.FOrdGreaterThan %1817, %cst_f32_6 : f32
%1841 = spirv.LogicalAnd %1836, %1837 : i1
%1842 = spirv.Select %1840, %cst_f32_7, %cst_f32_9 : i1, f32
%1843 = spirv.Select %1841, %1835, %1842 : i1, f32
%1844 = spirv.Select %1839, %cst_f32_7, %1843 : i1, f32
%1845 = spirv.Select %1838, %cst_f32_6, %1844 : i1, f32
%1846 = spirv.Select %1819, %1817, %1845 : i1, f32
%1847 = spirv.FConvert %1846 : f32 to f16
%1848 = spirv.FConvert %1420 : f16 to f32
%1849 = spirv.IsNan %1848 : f32
%1850 = spirv.LogicalOr %1849, %1849 : i1
%1851 = spirv.FMul %1848, %cst_f32_0 : f32
%1852 = spirv.GL.Floor %1851 : f32
%1853 = spirv.FMul %1852, %cst_f32 : f32
%1854 = spirv.FSub %1848, %1853 : f32
%1855 = spirv.FMul %1854, %1854 : f32
%1856 = spirv.FMul %1855, %1855 : f32
%1857 = spirv.GL.Fma %cst_f32_1, %1854, %cst_f32_1 : f32
%1858 = spirv.GL.Fma %cst_f32_3, %1854, %cst_f32_2 : f32
%1859 = spirv.GL.Fma %cst_f32_5, %1854, %cst_f32_4 : f32
%1860 = spirv.GL.Fma %1858, %1855, %1857 : f32
%1861 = spirv.GL.Fma %1859, %1856, %1860 : f32
%1862 = spirv.ConvertFToS %1852 : f32 to i32
%1863 = spirv.IAdd %1862, %cst127_i32 : i32
%1864 = spirv.ShiftLeftLogical %1863, %cst23_i32 : i32, i32
%1865 = spirv.Bitcast %1864 : i32 to f32
%1866 = spirv.FMul %1861, %1865 : f32
%1867 = spirv.SLessThanEqual %1862, %cst127_i32 : i32
%1868 = spirv.SGreaterThanEqual %1862, %cst-127_i32 : i32
%1869 = spirv.FOrdEqual %1848, %cst_f32_8 : f32
%1870 = spirv.FOrdEqual %1848, %cst_f32_7 : f32
%1871 = spirv.FOrdGreaterThan %1848, %cst_f32_6 : f32
%1872 = spirv.LogicalAnd %1867, %1868 : i1
%1873 = spirv.Select %1871, %cst_f32_7, %cst_f32_9 : i1, f32
%1874 = spirv.Select %1872, %1866, %1873 : i1, f32
%1875 = spirv.Select %1870, %cst_f32_7, %1874 : i1, f32
%1876 = spirv.Select %1869, %cst_f32_6, %1875 : i1, f32
%1877 = spirv.Select %1850, %1848, %1876 : i1, f32
%1878 = spirv.FConvert %1877 : f32 to f16
%1879 = spirv.FConvert %1421 : f16 to f32
%1880 = spirv.IsNan %1879 : f32
%1881 = spirv.LogicalOr %1880, %1880 : i1
%1882 = spirv.FMul %1879, %cst_f32_0 : f32
%1883 = spirv.GL.Floor %1882 : f32
%1884 = spirv.FMul %1883, %cst_f32 : f32
%1885 = spirv.FSub %1879, %1884 : f32
%1886 = spirv.FMul %1885, %1885 : f32
%1887 = spirv.FMul %1886, %1886 : f32
%1888 = spirv.GL.Fma %cst_f32_1, %1885, %cst_f32_1 : f32
%1889 = spirv.GL.Fma %cst_f32_3, %1885, %cst_f32_2 : f32
%1890 = spirv.GL.Fma %cst_f32_5, %1885, %cst_f32_4 : f32
%1891 = spirv.GL.Fma %1889, %1886, %1888 : f32
%1892 = spirv.GL.Fma %1890, %1887, %1891 : f32
%1893 = spirv.ConvertFToS %1883 : f32 to i32
%1894 = spirv.IAdd %1893, %cst127_i32 : i32
%1895 = spirv.ShiftLeftLogical %1894, %cst23_i32 : i32, i32
%1896 = spirv.Bitcast %1895 : i32 to f32
%1897 = spirv.FMul %1892, %1896 : f32
%1898 = spirv.SLessThanEqual %1893, %cst127_i32 : i32
%1899 =
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment