Last active
August 9, 2021 21:46
-
-
Save antiagainst/ebadbfa490d38636f2eea4b1164c7726 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- // | |
| builtin.module { | |
| flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/kernel" opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/position_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<512x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/token_type_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/embeddings/word_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_10/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_11/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_12/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_13/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_14/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_15/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_16/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_17/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_18/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_19/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_20/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_21/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_22/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_23/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_3/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_4/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_5/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_6/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_7/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_8/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_bert/encoder/layer_9/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_cls/squad/output_bias" dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> attributes {sym_visibility = "private"} | |
| flow.variable @"__iree_flow_cls/squad/output_weights" opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> attributes {sym_visibility = "private"} | |
| builtin.func @serving_default(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view) attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22named\22,\22segment_ids\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_mask\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_ids\22,[\22ndarray\22,\22i32\22,2,1,384]]],\22r\22:[[\22sdict\22,[\22end_logits\22,[\22ndarray\22,\22f32\22,2,1,384]],[\22start_logits\22,[\22ndarray\22,\22f32\22,2,1,384]]]],\22v\22:1}"}} { | |
| %0:2 = call @_serving_default(%arg0, %arg1, %arg2) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view) | |
| return %0#0, %0#1 : !hal.buffer_view, !hal.buffer_view | |
| } | |
| builtin.func private @_serving_default(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view) attributes {iree.abi = "{\22a\22:[[\22named\22,\22segment_ids\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_mask\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_ids\22,[\22ndarray\22,\22i32\22,2,1,384]]],\22r\22:[[\22sdict\22,[\22end_logits\22,[\22ndarray\22,\22f32\22,2,1,384]],[\22start_logits\22,[\22ndarray\22,\22f32\22,2,1,384]]]],\22v\22:1}"} { | |
| %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x384xi32> | |
| %1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<1x384xi32> | |
| %2 = hal.tensor.cast %arg2 : !hal.buffer_view -> tensor<1x384xi32> | |
| %3:2 = call @serving_default__ireesm(%0, %1, %2) : (tensor<1x384xi32>, tensor<1x384xi32>, tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>) | |
| %4 = hal.tensor.cast %3#0 : tensor<1x384xf32> -> !hal.buffer_view | |
| %5 = hal.tensor.cast %3#1 : tensor<1x384xf32> -> !hal.buffer_view | |
| return %4, %5 : !hal.buffer_view, !hal.buffer_view | |
| } | |
| builtin.func private @serving_default__ireesm(%arg0: tensor<1x384xi32>, %arg1: tensor<1x384xi32>, %arg2: tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>) attributes {tf.entry_function = {control_outputs = "", inputs = "segment_ids:0,input_mask:0,input_ids:0", outputs = "end_logits:0,start_logits:0"}} { | |
| %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> | |
| %cst_0 = constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> | |
| %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_244 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_245 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_246 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_247 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_248 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_249 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_250 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_251 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_252 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_253 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_254 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_255 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_256 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_257 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_258 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_259 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_260 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_261 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_262 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_263 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_264 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_265 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_266 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_267 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_268 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_269 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_270 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_271 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_272 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_273 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_274 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_275 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_276 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_277 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_278 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_279 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_280 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_281 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_282 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_283 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_284 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_285 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_286 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_287 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_288 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_289 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_290 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_291 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_292 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_293 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_294 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_295 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_296 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_297 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_298 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_299 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_300 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_301 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_302 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_303 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_304 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_305 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_306 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_307 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_308 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_309 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_310 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_311 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_312 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_313 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_314 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_315 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_316 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_317 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_318 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_319 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_320 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_321 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_322 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_323 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_324 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_325 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_326 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_327 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_328 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_329 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_330 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_331 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_332 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_333 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_334 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_335 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_336 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_337 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_338 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_339 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_340 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_341 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_342 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_343 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_344 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_345 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_346 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_347 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_348 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_349 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_350 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_351 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_352 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_353 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_354 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_355 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_356 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_357 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_358 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_359 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_360 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_361 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_362 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_363 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_364 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_365 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_366 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_367 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_368 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_369 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_370 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_371 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_372 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_373 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_374 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_375 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_376 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_377 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_378 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_379 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_380 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_381 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_382 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_383 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_384 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_385 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_386 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_387 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_388 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_389 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_390 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_391 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_392 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_393 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_394 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_395 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_396 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_397 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_398 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_399 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_400 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_401 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_402 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_403 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_404 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_405 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_406 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_407 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_408 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_409 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_410 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_411 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_412 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_413 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_414 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_415 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_416 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_417 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_418 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_419 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_420 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_421 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_422 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_423 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_424 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_425 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_426 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_427 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_428 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_429 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_430 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_431 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_432 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_433 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_434 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_435 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_436 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_437 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_438 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_439 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_440 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_441 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_442 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_443 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_444 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_445 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_446 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_447 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_448 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_449 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_450 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_451 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_452 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_453 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_454 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_455 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_456 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_457 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_458 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_459 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_460 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_461 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_462 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_463 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_464 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_465 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_466 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_467 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_468 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_469 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_470 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_471 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_472 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_473 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_474 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_475 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_476 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_477 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_478 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_479 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_480 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_481 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_482 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_483 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_484 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_485 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_486 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_487 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_488 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_489 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_490 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_491 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_492 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_493 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_494 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_495 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_496 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_497 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_498 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_499 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_500 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_501 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_502 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_503 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_504 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_505 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_506 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_507 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_508 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_509 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_510 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_511 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_512 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_513 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_514 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_515 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_516 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_517 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_518 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_519 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_520 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_521 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_522 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_523 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_524 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_525 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_526 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_527 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_528 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_529 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_530 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_531 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_532 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_533 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_534 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_535 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_536 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_537 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_538 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_539 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_540 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_541 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_542 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_543 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_544 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_545 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_546 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_547 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_548 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_549 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_550 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_551 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_552 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_553 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_554 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_555 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_556 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_557 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_558 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_559 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_560 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_561 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_562 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_563 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_564 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_565 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_566 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_567 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_568 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_569 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_570 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_571 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_572 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_573 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_574 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_575 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_576 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_577 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_578 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_579 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_580 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_581 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_582 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_583 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_584 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_585 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_586 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_587 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_588 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_589 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_590 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_591 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_592 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_593 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_594 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_595 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_596 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_597 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_598 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_599 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_600 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_601 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_602 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_603 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_604 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_605 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_606 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_607 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_608 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_609 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_610 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_611 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_612 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_613 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_614 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_615 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_616 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_617 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_618 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_619 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_620 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_621 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_622 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_623 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_624 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_625 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_626 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_627 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_628 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_629 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_630 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_631 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_632 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_633 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_634 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_635 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_636 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_637 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_638 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_639 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_640 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_641 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_642 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_643 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_644 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_645 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_646 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_647 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_648 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_649 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_650 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_651 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_652 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_653 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_654 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_655 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_656 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_657 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_658 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_659 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_660 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_661 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_662 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_663 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_664 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_665 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_666 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_667 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_668 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_669 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_670 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_671 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_672 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_673 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_674 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_675 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_676 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_677 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_678 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_679 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_680 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_681 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_682 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_683 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_684 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_685 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_686 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_687 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_688 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_689 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_690 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_691 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_692 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_693 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_694 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_695 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_696 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_697 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_698 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_699 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_700 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_701 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_702 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_703 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_704 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_705 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_706 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_707 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_708 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_709 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_710 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_711 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_712 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_713 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_714 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_715 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_716 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_717 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_718 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_719 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_720 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_721 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_722 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_723 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_724 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_725 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_726 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_727 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_728 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_729 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_730 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_731 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_732 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_733 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_734 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_735 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_736 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_737 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_738 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_739 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_740 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_741 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_742 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_743 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_744 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_745 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_746 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_747 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_748 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_749 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_750 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_751 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_752 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_753 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_754 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_755 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_756 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_757 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_758 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_759 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_760 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_761 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_762 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_763 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_764 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_765 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_766 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_767 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_768 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_769 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_770 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_771 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_772 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_773 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_774 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_775 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_776 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_777 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_778 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_779 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_780 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_781 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_782 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_783 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_784 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_785 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_786 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_787 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_788 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_789 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_790 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_791 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_792 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_793 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_794 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_795 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_796 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_797 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_798 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_799 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_800 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_801 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_802 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_803 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_804 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_805 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_806 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_807 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_808 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_809 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_810 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_811 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_812 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_813 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_814 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_815 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_816 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_817 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_818 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_819 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_820 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_821 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_822 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_823 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_824 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_825 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_826 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_827 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_828 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_829 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_830 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_831 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_832 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_833 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_834 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_835 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_836 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_837 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_838 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_839 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_840 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_841 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_842 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_843 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_844 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_845 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_846 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_847 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_848 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_849 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_850 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_851 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_852 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_853 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_854 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_855 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_856 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_857 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_858 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_859 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_860 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_861 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_862 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_863 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_864 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_865 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_866 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_867 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_868 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_869 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_870 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_871 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_872 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_873 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_874 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_875 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_876 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_877 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_878 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_879 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_880 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_881 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_882 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_883 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_884 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_885 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_886 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_887 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_888 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_889 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_890 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_891 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_892 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_893 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_894 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_895 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_896 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_897 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_898 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_899 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_900 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_901 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_902 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_903 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_904 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_905 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_906 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_907 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_908 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_909 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_910 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_911 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_912 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_913 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_914 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_915 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_916 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_917 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_918 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_919 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_920 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_921 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_922 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_923 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_924 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_925 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_926 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_927 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_928 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_929 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_930 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_931 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_932 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_933 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_934 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_935 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_936 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_937 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_938 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_939 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_940 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_941 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_942 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_943 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_944 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_945 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_946 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_947 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_948 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_949 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_950 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_951 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_952 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_953 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_954 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_955 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_956 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_957 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_958 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_959 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_960 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_961 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_962 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_963 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_964 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_965 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_966 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_967 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_968 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_969 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_970 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_971 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_972 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_973 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_974 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_975 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_976 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_977 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_978 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_979 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_980 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_981 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_982 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_983 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_984 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_985 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_986 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_987 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_988 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_989 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_990 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_991 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_992 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_993 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_994 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_995 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_996 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_997 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_998 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_999 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1000 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1001 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1002 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1003 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1004 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1005 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1006 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1007 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1008 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1009 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1010 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1011 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1012 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1013 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1014 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1015 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1016 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1017 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1018 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1019 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1020 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1021 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1022 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1023 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1024 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1025 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1026 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1027 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1028 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1029 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1030 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1031 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1032 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1033 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1034 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1035 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1036 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1037 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1038 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1039 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1040 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1041 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1042 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1043 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1044 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1045 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1046 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1047 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1048 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1049 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1050 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1051 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1052 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1053 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1054 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1055 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1056 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1057 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1058 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1059 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1060 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1061 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1062 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1063 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1064 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1065 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1066 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1067 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1068 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1069 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1070 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1071 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1072 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1073 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1074 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1075 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1076 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1077 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1078 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1079 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1080 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1081 = constant opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32> | |
| %cst_1082 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> | |
| %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1x384x512xf32> | |
| %cst_1083 = constant opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32> | |
| %cst_1084 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1085 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1086 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %1 = mhlo.constant dense<0.000000e+00> : tensor<f32> | |
| %2 = mhlo.constant dense<0xFF800000> : tensor<f32> | |
| %3 = mhlo.constant dense<-1.000000e+04> : tensor<f32> | |
| %4 = mhlo.constant dense<0.176776692> : tensor<f32> | |
| %5 = mhlo.constant dense<1.000000e+04> : tensor<f32> | |
| %6 = mhlo.constant dense<1.000000e+00> : tensor<1x384x1xf32> | |
| %7 = linalg.tensor_expand_shape %arg2 [[0], [1, 2]] : tensor<1x384xi32> into tensor<1x384x1xi32> | |
| %8 = "mhlo.torch_index_select"(%cst_1081, %7) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<30522x128xf32>, tensor<1x384x1xi32>) -> tensor<1x384x1x128xf32> | |
| %9 = "mhlo.reshape"(%8) : (tensor<1x384x1x128xf32>) -> tensor<1x384x128xf32> | |
| %10 = "mhlo.slice"(%9) {limit_indices = dense<[1, 384, 128]> : tensor<3xi64>, start_indices = dense<[0, 1, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32> | |
| %11 = "mhlo.pad"(%10, %1) {edge_padding_high = dense<[0, 1, 0]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32> | |
| %12 = "mhlo.slice"(%9) {limit_indices = dense<[1, 383, 128]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32> | |
| %13 = "mhlo.pad"(%12, %1) {edge_padding_high = dense<0> : tensor<3xi64>, edge_padding_low = dense<[0, 1, 0]> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32> | |
| %14 = "mhlo.concatenate"(%11, %9, %13) {dimension = 2 : i64} : (tensor<1x384x128xf32>, tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x384xf32> | |
| %15 = "mhlo.reshape"(%14) : (tensor<1x384x384xf32>) -> tensor<384x384xf32> | |
| %16 = "mhlo.dot"(%15, %cst_1083) : (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32> | |
| %17 = chlo.broadcast_add %16, %cst_1084 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %18 = "mhlo.reshape"(%17) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %19 = "mhlo.convert"(%arg1) : (tensor<1x384xi32>) -> tensor<1x384xf32> | |
| %20 = "mhlo.reshape"(%19) : (tensor<1x384xf32>) -> tensor<1x1x384xf32> | |
| %21 = chlo.broadcast_multiply %20, %6 : (tensor<1x1x384xf32>, tensor<1x384x1xf32>) -> tensor<1x384x384xf32> | |
| %22 = linalg.tensor_expand_shape %21 [[0], [1, 2], [3]] : tensor<1x384x384xf32> into tensor<1x1x384x384xf32> | |
| %23 = chlo.broadcast_multiply %22, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32> | |
| %24 = chlo.broadcast_add %23, %3 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32> | |
| %25 = "mhlo.torch_index_select"(%cst_1082, %arg0) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<2x512xf32>, tensor<1x384xi32>) -> tensor<1x384x512xf32> | |
| %26 = chlo.broadcast_add %18, %25 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %27 = chlo.broadcast_add %26, %0 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %28 = chlo.broadcast_multiply %27, %cst_1085 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %29 = chlo.broadcast_add %28, %cst_1086 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %30 = "mhlo.reshape"(%29) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %31 = "mhlo.dot"(%30, %cst_1071) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %32 = chlo.broadcast_add %31, %cst_1072 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %33 = "mhlo.reshape"(%32) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %34 = "mhlo.transpose"(%33) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %35 = "mhlo.dot"(%30, %cst_1067) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %36 = "mhlo.reshape"(%35) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %37 = "mhlo.broadcast_in_dim"(%cst_1068) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %38 = mhlo.add %36, %37 : tensor<1x384x128xf32> | |
| %39 = chlo.broadcast_multiply %38, %cst_1069 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %40 = chlo.broadcast_add %39, %cst_1070 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %41 = "mhlo.reshape"(%40) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %42 = "mhlo.dot"(%41, %cst_1075) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %43 = chlo.broadcast_add %42, %cst_1076 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %44 = "mhlo.reshape"(%43) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %45 = "mhlo.transpose"(%44) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %46 = "mhlo.dot"(%41, %cst_1073) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %47 = chlo.broadcast_add %46, %cst_1074 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %48 = "mhlo.reshape"(%47) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %49 = "mhlo.transpose"(%48) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %50 = "mhlo.dot_general"(%49, %45) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %51 = chlo.broadcast_multiply %50, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %52 = chlo.broadcast_add %51, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %53 = "mhlo.reduce"(%52, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %54 = linalg.tensor_expand_shape %53 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %55 = chlo.broadcast_subtract %52, %54 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %56 = "mhlo.exponential"(%55) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %57 = "mhlo.reduce"(%56, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %58 = linalg.tensor_expand_shape %57 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %59 = chlo.broadcast_divide %56, %58 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %60 = "mhlo.dot_general"(%59, %34) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %61 = "mhlo.transpose"(%60) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %62 = "mhlo.reshape"(%61) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %63 = "mhlo.dot"(%62, %cst_1077) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %64 = chlo.broadcast_add %63, %cst_1078 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %65 = "mhlo.reshape"(%64) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %66 = "mhlo.dot"(%30, %cst_1064) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %67 = chlo.broadcast_add %66, %cst_1065 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %68 = "mhlo.reshape"(%67) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %69 = chlo.broadcast_multiply %68, %cst_1066 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %70 = chlo.broadcast_add %69, %cst_1078 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %71 = chlo.broadcast_add %65, %70 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %72 = chlo.broadcast_multiply %71, %cst_1079 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %73 = chlo.broadcast_add %72, %cst_1080 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %74 = "mhlo.reshape"(%73) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %75 = "mhlo.dot"(%74, %cst_1062) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %76 = chlo.broadcast_add %75, %cst_1063 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %77 = "mhlo.reshape"(%76) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %78 = chlo.broadcast_maximum %77, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %79 = "mhlo.reshape"(%78) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %80 = "mhlo.dot"(%79, %cst_1058) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %81 = chlo.broadcast_add %80, %cst_1059 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %82 = "mhlo.reshape"(%81) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %83 = chlo.broadcast_add %82, %73 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %84 = chlo.broadcast_multiply %83, %cst_1060 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %85 = chlo.broadcast_add %84, %cst_1061 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %86 = "mhlo.reshape"(%85) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %87 = "mhlo.dot"(%86, %cst_1056) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %88 = chlo.broadcast_add %87, %cst_1057 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %89 = "mhlo.reshape"(%88) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %90 = chlo.broadcast_maximum %89, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %91 = "mhlo.reshape"(%90) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %92 = "mhlo.dot"(%91, %cst_1052) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %93 = chlo.broadcast_add %92, %cst_1053 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %94 = "mhlo.reshape"(%93) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %95 = chlo.broadcast_add %94, %85 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %96 = chlo.broadcast_multiply %95, %cst_1054 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %97 = chlo.broadcast_add %96, %cst_1055 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %98 = "mhlo.reshape"(%97) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %99 = "mhlo.dot"(%98, %cst_1050) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %100 = chlo.broadcast_add %99, %cst_1051 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %101 = "mhlo.reshape"(%100) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %102 = chlo.broadcast_maximum %101, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %103 = "mhlo.reshape"(%102) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %104 = "mhlo.dot"(%103, %cst_1046) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %105 = chlo.broadcast_add %104, %cst_1047 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %106 = "mhlo.reshape"(%105) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %107 = chlo.broadcast_add %106, %97 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %108 = chlo.broadcast_multiply %107, %cst_1048 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %109 = chlo.broadcast_add %108, %cst_1049 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %110 = "mhlo.reshape"(%109) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %111 = "mhlo.dot"(%110, %cst_1044) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %112 = chlo.broadcast_add %111, %cst_1045 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %113 = "mhlo.reshape"(%112) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %114 = chlo.broadcast_maximum %113, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %115 = "mhlo.reshape"(%114) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %116 = "mhlo.dot"(%115, %cst_1036) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %117 = chlo.broadcast_add %116, %cst_1037 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %118 = "mhlo.reshape"(%117) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %119 = chlo.broadcast_add %118, %109 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %120 = chlo.broadcast_multiply %119, %cst_1042 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %121 = chlo.broadcast_add %120, %cst_1043 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %122 = "mhlo.reshape"(%121) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %123 = "mhlo.dot"(%122, %cst_1038) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %124 = chlo.broadcast_add %123, %cst_1039 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %125 = "mhlo.reshape"(%124) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %126 = chlo.broadcast_add %125, %29 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %127 = chlo.broadcast_multiply %126, %cst_1040 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %128 = chlo.broadcast_add %127, %cst_1041 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %129 = "mhlo.reshape"(%128) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %130 = "mhlo.dot"(%129, %cst_1026) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %131 = chlo.broadcast_add %130, %cst_1027 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %132 = "mhlo.reshape"(%131) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %133 = "mhlo.transpose"(%132) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %134 = "mhlo.dot"(%129, %cst_1022) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %135 = "mhlo.reshape"(%134) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %136 = "mhlo.broadcast_in_dim"(%cst_1023) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %137 = mhlo.add %135, %136 : tensor<1x384x128xf32> | |
| %138 = chlo.broadcast_multiply %137, %cst_1024 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %139 = chlo.broadcast_add %138, %cst_1025 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %140 = "mhlo.reshape"(%139) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %141 = "mhlo.dot"(%140, %cst_1030) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %142 = chlo.broadcast_add %141, %cst_1031 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %143 = "mhlo.reshape"(%142) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %144 = "mhlo.transpose"(%143) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %145 = "mhlo.dot"(%140, %cst_1028) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %146 = chlo.broadcast_add %145, %cst_1029 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %147 = "mhlo.reshape"(%146) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %148 = "mhlo.transpose"(%147) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %149 = "mhlo.dot_general"(%148, %144) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %150 = chlo.broadcast_multiply %149, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %151 = chlo.broadcast_add %150, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %152 = "mhlo.reduce"(%151, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %153 = linalg.tensor_expand_shape %152 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %154 = chlo.broadcast_subtract %151, %153 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %155 = "mhlo.exponential"(%154) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %156 = "mhlo.reduce"(%155, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %157 = linalg.tensor_expand_shape %156 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %158 = chlo.broadcast_divide %155, %157 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %159 = "mhlo.dot_general"(%158, %133) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %160 = "mhlo.transpose"(%159) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %161 = "mhlo.reshape"(%160) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %162 = "mhlo.dot"(%161, %cst_1032) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %163 = chlo.broadcast_add %162, %cst_1033 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %164 = "mhlo.reshape"(%163) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %165 = "mhlo.dot"(%129, %cst_1019) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %166 = chlo.broadcast_add %165, %cst_1020 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %167 = "mhlo.reshape"(%166) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %168 = chlo.broadcast_multiply %167, %cst_1021 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %169 = chlo.broadcast_add %168, %cst_1033 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %170 = chlo.broadcast_add %164, %169 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %171 = chlo.broadcast_multiply %170, %cst_1034 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %172 = chlo.broadcast_add %171, %cst_1035 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %173 = "mhlo.reshape"(%172) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %174 = "mhlo.dot"(%173, %cst_1017) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %175 = chlo.broadcast_add %174, %cst_1018 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %176 = "mhlo.reshape"(%175) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %177 = chlo.broadcast_maximum %176, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %178 = "mhlo.reshape"(%177) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %179 = "mhlo.dot"(%178, %cst_1013) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %180 = chlo.broadcast_add %179, %cst_1014 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %181 = "mhlo.reshape"(%180) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %182 = chlo.broadcast_add %181, %172 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %183 = chlo.broadcast_multiply %182, %cst_1015 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %184 = chlo.broadcast_add %183, %cst_1016 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %185 = "mhlo.reshape"(%184) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %186 = "mhlo.dot"(%185, %cst_1011) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %187 = chlo.broadcast_add %186, %cst_1012 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %188 = "mhlo.reshape"(%187) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %189 = chlo.broadcast_maximum %188, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %190 = "mhlo.reshape"(%189) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %191 = "mhlo.dot"(%190, %cst_1007) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %192 = chlo.broadcast_add %191, %cst_1008 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %193 = "mhlo.reshape"(%192) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %194 = chlo.broadcast_add %193, %184 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %195 = chlo.broadcast_multiply %194, %cst_1009 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %196 = chlo.broadcast_add %195, %cst_1010 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %197 = "mhlo.reshape"(%196) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %198 = "mhlo.dot"(%197, %cst_1005) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %199 = chlo.broadcast_add %198, %cst_1006 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %200 = "mhlo.reshape"(%199) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %201 = chlo.broadcast_maximum %200, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %202 = "mhlo.reshape"(%201) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %203 = "mhlo.dot"(%202, %cst_1001) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %204 = chlo.broadcast_add %203, %cst_1002 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %205 = "mhlo.reshape"(%204) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %206 = chlo.broadcast_add %205, %196 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %207 = chlo.broadcast_multiply %206, %cst_1003 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %208 = chlo.broadcast_add %207, %cst_1004 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %209 = "mhlo.reshape"(%208) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %210 = "mhlo.dot"(%209, %cst_999) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %211 = chlo.broadcast_add %210, %cst_1000 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %212 = "mhlo.reshape"(%211) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %213 = chlo.broadcast_maximum %212, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %214 = "mhlo.reshape"(%213) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %215 = "mhlo.dot"(%214, %cst_991) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %216 = chlo.broadcast_add %215, %cst_992 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %217 = "mhlo.reshape"(%216) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %218 = chlo.broadcast_add %217, %208 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %219 = chlo.broadcast_multiply %218, %cst_997 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %220 = chlo.broadcast_add %219, %cst_998 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %221 = "mhlo.reshape"(%220) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %222 = "mhlo.dot"(%221, %cst_993) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %223 = chlo.broadcast_add %222, %cst_994 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %224 = "mhlo.reshape"(%223) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %225 = chlo.broadcast_add %224, %128 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %226 = chlo.broadcast_multiply %225, %cst_995 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %227 = chlo.broadcast_add %226, %cst_996 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %228 = "mhlo.reshape"(%227) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %229 = "mhlo.dot"(%228, %cst_531) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %230 = chlo.broadcast_add %229, %cst_532 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %231 = "mhlo.reshape"(%230) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %232 = "mhlo.transpose"(%231) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %233 = "mhlo.dot"(%228, %cst_527) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %234 = "mhlo.reshape"(%233) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %235 = "mhlo.broadcast_in_dim"(%cst_528) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %236 = mhlo.add %234, %235 : tensor<1x384x128xf32> | |
| %237 = chlo.broadcast_multiply %236, %cst_529 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %238 = chlo.broadcast_add %237, %cst_530 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %239 = "mhlo.reshape"(%238) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %240 = "mhlo.dot"(%239, %cst_535) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %241 = chlo.broadcast_add %240, %cst_536 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %242 = "mhlo.reshape"(%241) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %243 = "mhlo.transpose"(%242) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %244 = "mhlo.dot"(%239, %cst_533) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %245 = chlo.broadcast_add %244, %cst_534 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %246 = "mhlo.reshape"(%245) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %247 = "mhlo.transpose"(%246) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %248 = "mhlo.dot_general"(%247, %243) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %249 = chlo.broadcast_multiply %248, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %250 = chlo.broadcast_add %249, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %251 = "mhlo.reduce"(%250, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %252 = linalg.tensor_expand_shape %251 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %253 = chlo.broadcast_subtract %250, %252 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %254 = "mhlo.exponential"(%253) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %255 = "mhlo.reduce"(%254, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %256 = linalg.tensor_expand_shape %255 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %257 = chlo.broadcast_divide %254, %256 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %258 = "mhlo.dot_general"(%257, %232) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %259 = "mhlo.transpose"(%258) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %260 = "mhlo.reshape"(%259) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %261 = "mhlo.dot"(%260, %cst_537) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %262 = chlo.broadcast_add %261, %cst_538 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %263 = "mhlo.reshape"(%262) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %264 = "mhlo.dot"(%228, %cst_524) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %265 = chlo.broadcast_add %264, %cst_525 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %266 = "mhlo.reshape"(%265) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %267 = chlo.broadcast_multiply %266, %cst_526 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %268 = chlo.broadcast_add %267, %cst_538 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %269 = chlo.broadcast_add %263, %268 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %270 = chlo.broadcast_multiply %269, %cst_539 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %271 = chlo.broadcast_add %270, %cst_540 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %272 = "mhlo.reshape"(%271) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %273 = "mhlo.dot"(%272, %cst_522) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %274 = chlo.broadcast_add %273, %cst_523 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %275 = "mhlo.reshape"(%274) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %276 = chlo.broadcast_maximum %275, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %277 = "mhlo.reshape"(%276) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %278 = "mhlo.dot"(%277, %cst_518) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %279 = chlo.broadcast_add %278, %cst_519 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %280 = "mhlo.reshape"(%279) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %281 = chlo.broadcast_add %280, %271 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %282 = chlo.broadcast_multiply %281, %cst_520 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %283 = chlo.broadcast_add %282, %cst_521 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %284 = "mhlo.reshape"(%283) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %285 = "mhlo.dot"(%284, %cst_516) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %286 = chlo.broadcast_add %285, %cst_517 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %287 = "mhlo.reshape"(%286) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %288 = chlo.broadcast_maximum %287, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %289 = "mhlo.reshape"(%288) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %290 = "mhlo.dot"(%289, %cst_512) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %291 = chlo.broadcast_add %290, %cst_513 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %292 = "mhlo.reshape"(%291) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %293 = chlo.broadcast_add %292, %283 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %294 = chlo.broadcast_multiply %293, %cst_514 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %295 = chlo.broadcast_add %294, %cst_515 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %296 = "mhlo.reshape"(%295) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %297 = "mhlo.dot"(%296, %cst_510) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %298 = chlo.broadcast_add %297, %cst_511 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %299 = "mhlo.reshape"(%298) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %300 = chlo.broadcast_maximum %299, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %301 = "mhlo.reshape"(%300) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %302 = "mhlo.dot"(%301, %cst_506) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %303 = chlo.broadcast_add %302, %cst_507 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %304 = "mhlo.reshape"(%303) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %305 = chlo.broadcast_add %304, %295 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %306 = chlo.broadcast_multiply %305, %cst_508 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %307 = chlo.broadcast_add %306, %cst_509 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %308 = "mhlo.reshape"(%307) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %309 = "mhlo.dot"(%308, %cst_504) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %310 = chlo.broadcast_add %309, %cst_505 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %311 = "mhlo.reshape"(%310) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %312 = chlo.broadcast_maximum %311, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %313 = "mhlo.reshape"(%312) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %314 = "mhlo.dot"(%313, %cst_496) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %315 = chlo.broadcast_add %314, %cst_497 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %316 = "mhlo.reshape"(%315) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %317 = chlo.broadcast_add %316, %307 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %318 = chlo.broadcast_multiply %317, %cst_502 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %319 = chlo.broadcast_add %318, %cst_503 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %320 = "mhlo.reshape"(%319) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %321 = "mhlo.dot"(%320, %cst_498) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %322 = chlo.broadcast_add %321, %cst_499 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %323 = "mhlo.reshape"(%322) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %324 = chlo.broadcast_add %323, %227 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %325 = chlo.broadcast_multiply %324, %cst_500 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %326 = chlo.broadcast_add %325, %cst_501 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %327 = "mhlo.reshape"(%326) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %328 = "mhlo.dot"(%327, %cst_306) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %329 = chlo.broadcast_add %328, %cst_307 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %330 = "mhlo.reshape"(%329) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %331 = "mhlo.transpose"(%330) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %332 = "mhlo.dot"(%327, %cst_302) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %333 = "mhlo.reshape"(%332) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %334 = "mhlo.broadcast_in_dim"(%cst_303) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %335 = mhlo.add %333, %334 : tensor<1x384x128xf32> | |
| %336 = chlo.broadcast_multiply %335, %cst_304 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %337 = chlo.broadcast_add %336, %cst_305 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %338 = "mhlo.reshape"(%337) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %339 = "mhlo.dot"(%338, %cst_310) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %340 = chlo.broadcast_add %339, %cst_311 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %341 = "mhlo.reshape"(%340) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %342 = "mhlo.transpose"(%341) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %343 = "mhlo.dot"(%338, %cst_308) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %344 = chlo.broadcast_add %343, %cst_309 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %345 = "mhlo.reshape"(%344) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %346 = "mhlo.transpose"(%345) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %347 = "mhlo.dot_general"(%346, %342) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %348 = chlo.broadcast_multiply %347, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %349 = chlo.broadcast_add %348, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %350 = "mhlo.reduce"(%349, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %351 = linalg.tensor_expand_shape %350 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %352 = chlo.broadcast_subtract %349, %351 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %353 = "mhlo.exponential"(%352) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %354 = "mhlo.reduce"(%353, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %355 = linalg.tensor_expand_shape %354 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %356 = chlo.broadcast_divide %353, %355 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %357 = "mhlo.dot_general"(%356, %331) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %358 = "mhlo.transpose"(%357) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %359 = "mhlo.reshape"(%358) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %360 = "mhlo.dot"(%359, %cst_312) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %361 = chlo.broadcast_add %360, %cst_313 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %362 = "mhlo.reshape"(%361) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %363 = "mhlo.dot"(%327, %cst_299) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %364 = chlo.broadcast_add %363, %cst_300 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %365 = "mhlo.reshape"(%364) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %366 = chlo.broadcast_multiply %365, %cst_301 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %367 = chlo.broadcast_add %366, %cst_313 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %368 = chlo.broadcast_add %362, %367 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %369 = chlo.broadcast_multiply %368, %cst_314 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %370 = chlo.broadcast_add %369, %cst_315 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %371 = "mhlo.reshape"(%370) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %372 = "mhlo.dot"(%371, %cst_297) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %373 = chlo.broadcast_add %372, %cst_298 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %374 = "mhlo.reshape"(%373) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %375 = chlo.broadcast_maximum %374, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %376 = "mhlo.reshape"(%375) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %377 = "mhlo.dot"(%376, %cst_293) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %378 = chlo.broadcast_add %377, %cst_294 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %379 = "mhlo.reshape"(%378) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %380 = chlo.broadcast_add %379, %370 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %381 = chlo.broadcast_multiply %380, %cst_295 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %382 = chlo.broadcast_add %381, %cst_296 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %383 = "mhlo.reshape"(%382) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %384 = "mhlo.dot"(%383, %cst_291) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %385 = chlo.broadcast_add %384, %cst_292 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %386 = "mhlo.reshape"(%385) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %387 = chlo.broadcast_maximum %386, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %388 = "mhlo.reshape"(%387) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %389 = "mhlo.dot"(%388, %cst_287) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %390 = chlo.broadcast_add %389, %cst_288 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %391 = "mhlo.reshape"(%390) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %392 = chlo.broadcast_add %391, %382 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %393 = chlo.broadcast_multiply %392, %cst_289 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %394 = chlo.broadcast_add %393, %cst_290 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %395 = "mhlo.reshape"(%394) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %396 = "mhlo.dot"(%395, %cst_285) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %397 = chlo.broadcast_add %396, %cst_286 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %398 = "mhlo.reshape"(%397) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %399 = chlo.broadcast_maximum %398, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %400 = "mhlo.reshape"(%399) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %401 = "mhlo.dot"(%400, %cst_281) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %402 = chlo.broadcast_add %401, %cst_282 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %403 = "mhlo.reshape"(%402) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %404 = chlo.broadcast_add %403, %394 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %405 = chlo.broadcast_multiply %404, %cst_283 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %406 = chlo.broadcast_add %405, %cst_284 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %407 = "mhlo.reshape"(%406) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %408 = "mhlo.dot"(%407, %cst_279) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %409 = chlo.broadcast_add %408, %cst_280 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %410 = "mhlo.reshape"(%409) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %411 = chlo.broadcast_maximum %410, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %412 = "mhlo.reshape"(%411) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %413 = "mhlo.dot"(%412, %cst_271) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %414 = chlo.broadcast_add %413, %cst_272 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %415 = "mhlo.reshape"(%414) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %416 = chlo.broadcast_add %415, %406 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %417 = chlo.broadcast_multiply %416, %cst_277 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %418 = chlo.broadcast_add %417, %cst_278 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %419 = "mhlo.reshape"(%418) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %420 = "mhlo.dot"(%419, %cst_273) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %421 = chlo.broadcast_add %420, %cst_274 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %422 = "mhlo.reshape"(%421) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %423 = chlo.broadcast_add %422, %326 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %424 = chlo.broadcast_multiply %423, %cst_275 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %425 = chlo.broadcast_add %424, %cst_276 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %426 = "mhlo.reshape"(%425) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %427 = "mhlo.dot"(%426, %cst_261) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %428 = chlo.broadcast_add %427, %cst_262 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %429 = "mhlo.reshape"(%428) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %430 = "mhlo.transpose"(%429) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %431 = "mhlo.dot"(%426, %cst_257) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %432 = "mhlo.reshape"(%431) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %433 = "mhlo.broadcast_in_dim"(%cst_258) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %434 = mhlo.add %432, %433 : tensor<1x384x128xf32> | |
| %435 = chlo.broadcast_multiply %434, %cst_259 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %436 = chlo.broadcast_add %435, %cst_260 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %437 = "mhlo.reshape"(%436) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %438 = "mhlo.dot"(%437, %cst_265) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %439 = chlo.broadcast_add %438, %cst_266 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %440 = "mhlo.reshape"(%439) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %441 = "mhlo.transpose"(%440) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %442 = "mhlo.dot"(%437, %cst_263) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %443 = chlo.broadcast_add %442, %cst_264 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %444 = "mhlo.reshape"(%443) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %445 = "mhlo.transpose"(%444) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %446 = "mhlo.dot_general"(%445, %441) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %447 = chlo.broadcast_multiply %446, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %448 = chlo.broadcast_add %447, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %449 = "mhlo.reduce"(%448, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %450 = linalg.tensor_expand_shape %449 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %451 = chlo.broadcast_subtract %448, %450 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %452 = "mhlo.exponential"(%451) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %453 = "mhlo.reduce"(%452, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %454 = linalg.tensor_expand_shape %453 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %455 = chlo.broadcast_divide %452, %454 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %456 = "mhlo.dot_general"(%455, %430) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %457 = "mhlo.transpose"(%456) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %458 = "mhlo.reshape"(%457) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %459 = "mhlo.dot"(%458, %cst_267) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %460 = chlo.broadcast_add %459, %cst_268 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %461 = "mhlo.reshape"(%460) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %462 = "mhlo.dot"(%426, %cst_254) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %463 = chlo.broadcast_add %462, %cst_255 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %464 = "mhlo.reshape"(%463) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %465 = chlo.broadcast_multiply %464, %cst_256 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %466 = chlo.broadcast_add %465, %cst_268 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %467 = chlo.broadcast_add %461, %466 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %468 = chlo.broadcast_multiply %467, %cst_269 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %469 = chlo.broadcast_add %468, %cst_270 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %470 = "mhlo.reshape"(%469) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %471 = "mhlo.dot"(%470, %cst_252) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %472 = chlo.broadcast_add %471, %cst_253 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %473 = "mhlo.reshape"(%472) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %474 = chlo.broadcast_maximum %473, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %475 = "mhlo.reshape"(%474) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %476 = "mhlo.dot"(%475, %cst_248) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %477 = chlo.broadcast_add %476, %cst_249 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %478 = "mhlo.reshape"(%477) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %479 = chlo.broadcast_add %478, %469 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %480 = chlo.broadcast_multiply %479, %cst_250 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %481 = chlo.broadcast_add %480, %cst_251 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %482 = "mhlo.reshape"(%481) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %483 = "mhlo.dot"(%482, %cst_246) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %484 = chlo.broadcast_add %483, %cst_247 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %485 = "mhlo.reshape"(%484) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %486 = chlo.broadcast_maximum %485, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %487 = "mhlo.reshape"(%486) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %488 = "mhlo.dot"(%487, %cst_242) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %489 = chlo.broadcast_add %488, %cst_243 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %490 = "mhlo.reshape"(%489) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %491 = chlo.broadcast_add %490, %481 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %492 = chlo.broadcast_multiply %491, %cst_244 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %493 = chlo.broadcast_add %492, %cst_245 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %494 = "mhlo.reshape"(%493) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %495 = "mhlo.dot"(%494, %cst_240) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %496 = chlo.broadcast_add %495, %cst_241 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %497 = "mhlo.reshape"(%496) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %498 = chlo.broadcast_maximum %497, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %499 = "mhlo.reshape"(%498) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %500 = "mhlo.dot"(%499, %cst_236) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %501 = chlo.broadcast_add %500, %cst_237 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %502 = "mhlo.reshape"(%501) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %503 = chlo.broadcast_add %502, %493 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %504 = chlo.broadcast_multiply %503, %cst_238 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %505 = chlo.broadcast_add %504, %cst_239 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %506 = "mhlo.reshape"(%505) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %507 = "mhlo.dot"(%506, %cst_234) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %508 = chlo.broadcast_add %507, %cst_235 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %509 = "mhlo.reshape"(%508) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %510 = chlo.broadcast_maximum %509, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %511 = "mhlo.reshape"(%510) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %512 = "mhlo.dot"(%511, %cst_226) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %513 = chlo.broadcast_add %512, %cst_227 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %514 = "mhlo.reshape"(%513) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %515 = chlo.broadcast_add %514, %505 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %516 = chlo.broadcast_multiply %515, %cst_232 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %517 = chlo.broadcast_add %516, %cst_233 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %518 = "mhlo.reshape"(%517) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %519 = "mhlo.dot"(%518, %cst_228) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %520 = chlo.broadcast_add %519, %cst_229 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %521 = "mhlo.reshape"(%520) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %522 = chlo.broadcast_add %521, %425 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %523 = chlo.broadcast_multiply %522, %cst_230 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %524 = chlo.broadcast_add %523, %cst_231 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %525 = "mhlo.reshape"(%524) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %526 = "mhlo.dot"(%525, %cst_216) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %527 = chlo.broadcast_add %526, %cst_217 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %528 = "mhlo.reshape"(%527) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %529 = "mhlo.transpose"(%528) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %530 = "mhlo.dot"(%525, %cst_212) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %531 = "mhlo.reshape"(%530) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %532 = "mhlo.broadcast_in_dim"(%cst_213) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %533 = mhlo.add %531, %532 : tensor<1x384x128xf32> | |
| %534 = chlo.broadcast_multiply %533, %cst_214 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %535 = chlo.broadcast_add %534, %cst_215 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %536 = "mhlo.reshape"(%535) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %537 = "mhlo.dot"(%536, %cst_220) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %538 = chlo.broadcast_add %537, %cst_221 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %539 = "mhlo.reshape"(%538) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %540 = "mhlo.transpose"(%539) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %541 = "mhlo.dot"(%536, %cst_218) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %542 = chlo.broadcast_add %541, %cst_219 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %543 = "mhlo.reshape"(%542) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %544 = "mhlo.transpose"(%543) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %545 = "mhlo.dot_general"(%544, %540) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %546 = chlo.broadcast_multiply %545, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %547 = chlo.broadcast_add %546, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %548 = "mhlo.reduce"(%547, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %549 = linalg.tensor_expand_shape %548 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %550 = chlo.broadcast_subtract %547, %549 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %551 = "mhlo.exponential"(%550) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %552 = "mhlo.reduce"(%551, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %553 = linalg.tensor_expand_shape %552 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %554 = chlo.broadcast_divide %551, %553 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %555 = "mhlo.dot_general"(%554, %529) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %556 = "mhlo.transpose"(%555) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %557 = "mhlo.reshape"(%556) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %558 = "mhlo.dot"(%557, %cst_222) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %559 = chlo.broadcast_add %558, %cst_223 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %560 = "mhlo.reshape"(%559) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %561 = "mhlo.dot"(%525, %cst_209) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %562 = chlo.broadcast_add %561, %cst_210 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %563 = "mhlo.reshape"(%562) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %564 = chlo.broadcast_multiply %563, %cst_211 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %565 = chlo.broadcast_add %564, %cst_223 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %566 = chlo.broadcast_add %560, %565 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %567 = chlo.broadcast_multiply %566, %cst_224 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %568 = chlo.broadcast_add %567, %cst_225 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %569 = "mhlo.reshape"(%568) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %570 = "mhlo.dot"(%569, %cst_207) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %571 = chlo.broadcast_add %570, %cst_208 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %572 = "mhlo.reshape"(%571) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %573 = chlo.broadcast_maximum %572, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %574 = "mhlo.reshape"(%573) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %575 = "mhlo.dot"(%574, %cst_203) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %576 = chlo.broadcast_add %575, %cst_204 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %577 = "mhlo.reshape"(%576) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %578 = chlo.broadcast_add %577, %568 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %579 = chlo.broadcast_multiply %578, %cst_205 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %580 = chlo.broadcast_add %579, %cst_206 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %581 = "mhlo.reshape"(%580) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %582 = "mhlo.dot"(%581, %cst_201) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %583 = chlo.broadcast_add %582, %cst_202 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %584 = "mhlo.reshape"(%583) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %585 = chlo.broadcast_maximum %584, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %586 = "mhlo.reshape"(%585) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %587 = "mhlo.dot"(%586, %cst_197) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %588 = chlo.broadcast_add %587, %cst_198 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %589 = "mhlo.reshape"(%588) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %590 = chlo.broadcast_add %589, %580 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %591 = chlo.broadcast_multiply %590, %cst_199 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %592 = chlo.broadcast_add %591, %cst_200 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %593 = "mhlo.reshape"(%592) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %594 = "mhlo.dot"(%593, %cst_195) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %595 = chlo.broadcast_add %594, %cst_196 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %596 = "mhlo.reshape"(%595) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %597 = chlo.broadcast_maximum %596, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %598 = "mhlo.reshape"(%597) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %599 = "mhlo.dot"(%598, %cst_191) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %600 = chlo.broadcast_add %599, %cst_192 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %601 = "mhlo.reshape"(%600) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %602 = chlo.broadcast_add %601, %592 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %603 = chlo.broadcast_multiply %602, %cst_193 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %604 = chlo.broadcast_add %603, %cst_194 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %605 = "mhlo.reshape"(%604) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %606 = "mhlo.dot"(%605, %cst_189) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %607 = chlo.broadcast_add %606, %cst_190 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %608 = "mhlo.reshape"(%607) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %609 = chlo.broadcast_maximum %608, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %610 = "mhlo.reshape"(%609) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %611 = "mhlo.dot"(%610, %cst_181) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %612 = chlo.broadcast_add %611, %cst_182 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %613 = "mhlo.reshape"(%612) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %614 = chlo.broadcast_add %613, %604 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %615 = chlo.broadcast_multiply %614, %cst_187 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %616 = chlo.broadcast_add %615, %cst_188 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %617 = "mhlo.reshape"(%616) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %618 = "mhlo.dot"(%617, %cst_183) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %619 = chlo.broadcast_add %618, %cst_184 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %620 = "mhlo.reshape"(%619) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %621 = chlo.broadcast_add %620, %524 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %622 = chlo.broadcast_multiply %621, %cst_185 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %623 = chlo.broadcast_add %622, %cst_186 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %624 = "mhlo.reshape"(%623) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %625 = "mhlo.dot"(%624, %cst_171) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %626 = chlo.broadcast_add %625, %cst_172 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %627 = "mhlo.reshape"(%626) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %628 = "mhlo.transpose"(%627) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %629 = "mhlo.dot"(%624, %cst_167) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %630 = "mhlo.reshape"(%629) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %631 = "mhlo.broadcast_in_dim"(%cst_168) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %632 = mhlo.add %630, %631 : tensor<1x384x128xf32> | |
| %633 = chlo.broadcast_multiply %632, %cst_169 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %634 = chlo.broadcast_add %633, %cst_170 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %635 = "mhlo.reshape"(%634) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %636 = "mhlo.dot"(%635, %cst_175) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %637 = chlo.broadcast_add %636, %cst_176 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %638 = "mhlo.reshape"(%637) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %639 = "mhlo.transpose"(%638) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %640 = "mhlo.dot"(%635, %cst_173) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %641 = chlo.broadcast_add %640, %cst_174 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %642 = "mhlo.reshape"(%641) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %643 = "mhlo.transpose"(%642) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %644 = "mhlo.dot_general"(%643, %639) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %645 = chlo.broadcast_multiply %644, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %646 = chlo.broadcast_add %645, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %647 = "mhlo.reduce"(%646, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %648 = linalg.tensor_expand_shape %647 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %649 = chlo.broadcast_subtract %646, %648 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %650 = "mhlo.exponential"(%649) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %651 = "mhlo.reduce"(%650, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %652 = linalg.tensor_expand_shape %651 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %653 = chlo.broadcast_divide %650, %652 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %654 = "mhlo.dot_general"(%653, %628) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %655 = "mhlo.transpose"(%654) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %656 = "mhlo.reshape"(%655) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %657 = "mhlo.dot"(%656, %cst_177) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %658 = chlo.broadcast_add %657, %cst_178 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %659 = "mhlo.reshape"(%658) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %660 = "mhlo.dot"(%624, %cst_164) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %661 = chlo.broadcast_add %660, %cst_165 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %662 = "mhlo.reshape"(%661) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %663 = chlo.broadcast_multiply %662, %cst_166 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %664 = chlo.broadcast_add %663, %cst_178 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %665 = chlo.broadcast_add %659, %664 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %666 = chlo.broadcast_multiply %665, %cst_179 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %667 = chlo.broadcast_add %666, %cst_180 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %668 = "mhlo.reshape"(%667) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %669 = "mhlo.dot"(%668, %cst_162) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %670 = chlo.broadcast_add %669, %cst_163 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %671 = "mhlo.reshape"(%670) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %672 = chlo.broadcast_maximum %671, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %673 = "mhlo.reshape"(%672) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %674 = "mhlo.dot"(%673, %cst_158) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %675 = chlo.broadcast_add %674, %cst_159 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %676 = "mhlo.reshape"(%675) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %677 = chlo.broadcast_add %676, %667 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %678 = chlo.broadcast_multiply %677, %cst_160 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %679 = chlo.broadcast_add %678, %cst_161 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %680 = "mhlo.reshape"(%679) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %681 = "mhlo.dot"(%680, %cst_156) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %682 = chlo.broadcast_add %681, %cst_157 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %683 = "mhlo.reshape"(%682) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %684 = chlo.broadcast_maximum %683, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %685 = "mhlo.reshape"(%684) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %686 = "mhlo.dot"(%685, %cst_152) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %687 = chlo.broadcast_add %686, %cst_153 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %688 = "mhlo.reshape"(%687) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %689 = chlo.broadcast_add %688, %679 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %690 = chlo.broadcast_multiply %689, %cst_154 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %691 = chlo.broadcast_add %690, %cst_155 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %692 = "mhlo.reshape"(%691) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %693 = "mhlo.dot"(%692, %cst_150) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %694 = chlo.broadcast_add %693, %cst_151 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %695 = "mhlo.reshape"(%694) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %696 = chlo.broadcast_maximum %695, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %697 = "mhlo.reshape"(%696) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %698 = "mhlo.dot"(%697, %cst_146) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %699 = chlo.broadcast_add %698, %cst_147 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %700 = "mhlo.reshape"(%699) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %701 = chlo.broadcast_add %700, %691 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %702 = chlo.broadcast_multiply %701, %cst_148 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %703 = chlo.broadcast_add %702, %cst_149 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %704 = "mhlo.reshape"(%703) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %705 = "mhlo.dot"(%704, %cst_144) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %706 = chlo.broadcast_add %705, %cst_145 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %707 = "mhlo.reshape"(%706) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %708 = chlo.broadcast_maximum %707, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %709 = "mhlo.reshape"(%708) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %710 = "mhlo.dot"(%709, %cst_136) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %711 = chlo.broadcast_add %710, %cst_137 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %712 = "mhlo.reshape"(%711) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %713 = chlo.broadcast_add %712, %703 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %714 = chlo.broadcast_multiply %713, %cst_142 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %715 = chlo.broadcast_add %714, %cst_143 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %716 = "mhlo.reshape"(%715) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %717 = "mhlo.dot"(%716, %cst_138) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %718 = chlo.broadcast_add %717, %cst_139 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %719 = "mhlo.reshape"(%718) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %720 = chlo.broadcast_add %719, %623 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %721 = chlo.broadcast_multiply %720, %cst_140 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %722 = chlo.broadcast_add %721, %cst_141 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %723 = "mhlo.reshape"(%722) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %724 = "mhlo.dot"(%723, %cst_126) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %725 = chlo.broadcast_add %724, %cst_127 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %726 = "mhlo.reshape"(%725) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %727 = "mhlo.transpose"(%726) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %728 = "mhlo.dot"(%723, %cst_122) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %729 = "mhlo.reshape"(%728) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %730 = "mhlo.broadcast_in_dim"(%cst_123) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %731 = mhlo.add %729, %730 : tensor<1x384x128xf32> | |
| %732 = chlo.broadcast_multiply %731, %cst_124 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %733 = chlo.broadcast_add %732, %cst_125 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %734 = "mhlo.reshape"(%733) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %735 = "mhlo.dot"(%734, %cst_130) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %736 = chlo.broadcast_add %735, %cst_131 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %737 = "mhlo.reshape"(%736) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %738 = "mhlo.transpose"(%737) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %739 = "mhlo.dot"(%734, %cst_128) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %740 = chlo.broadcast_add %739, %cst_129 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %741 = "mhlo.reshape"(%740) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %742 = "mhlo.transpose"(%741) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %743 = "mhlo.dot_general"(%742, %738) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %744 = chlo.broadcast_multiply %743, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %745 = chlo.broadcast_add %744, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %746 = "mhlo.reduce"(%745, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %747 = linalg.tensor_expand_shape %746 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %748 = chlo.broadcast_subtract %745, %747 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %749 = "mhlo.exponential"(%748) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %750 = "mhlo.reduce"(%749, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %751 = linalg.tensor_expand_shape %750 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %752 = chlo.broadcast_divide %749, %751 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %753 = "mhlo.dot_general"(%752, %727) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %754 = "mhlo.transpose"(%753) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %755 = "mhlo.reshape"(%754) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %756 = "mhlo.dot"(%755, %cst_132) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %757 = chlo.broadcast_add %756, %cst_133 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %758 = "mhlo.reshape"(%757) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %759 = "mhlo.dot"(%723, %cst_119) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %760 = chlo.broadcast_add %759, %cst_120 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %761 = "mhlo.reshape"(%760) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %762 = chlo.broadcast_multiply %761, %cst_121 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %763 = chlo.broadcast_add %762, %cst_133 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %764 = chlo.broadcast_add %758, %763 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %765 = chlo.broadcast_multiply %764, %cst_134 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %766 = chlo.broadcast_add %765, %cst_135 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %767 = "mhlo.reshape"(%766) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %768 = "mhlo.dot"(%767, %cst_117) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %769 = chlo.broadcast_add %768, %cst_118 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %770 = "mhlo.reshape"(%769) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %771 = chlo.broadcast_maximum %770, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %772 = "mhlo.reshape"(%771) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %773 = "mhlo.dot"(%772, %cst_113) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %774 = chlo.broadcast_add %773, %cst_114 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %775 = "mhlo.reshape"(%774) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %776 = chlo.broadcast_add %775, %766 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %777 = chlo.broadcast_multiply %776, %cst_115 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %778 = chlo.broadcast_add %777, %cst_116 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %779 = "mhlo.reshape"(%778) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %780 = "mhlo.dot"(%779, %cst_111) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %781 = chlo.broadcast_add %780, %cst_112 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %782 = "mhlo.reshape"(%781) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %783 = chlo.broadcast_maximum %782, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %784 = "mhlo.reshape"(%783) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %785 = "mhlo.dot"(%784, %cst_107) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %786 = chlo.broadcast_add %785, %cst_108 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %787 = "mhlo.reshape"(%786) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %788 = chlo.broadcast_add %787, %778 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %789 = chlo.broadcast_multiply %788, %cst_109 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %790 = chlo.broadcast_add %789, %cst_110 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %791 = "mhlo.reshape"(%790) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %792 = "mhlo.dot"(%791, %cst_105) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %793 = chlo.broadcast_add %792, %cst_106 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %794 = "mhlo.reshape"(%793) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %795 = chlo.broadcast_maximum %794, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %796 = "mhlo.reshape"(%795) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %797 = "mhlo.dot"(%796, %cst_101) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %798 = chlo.broadcast_add %797, %cst_102 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %799 = "mhlo.reshape"(%798) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %800 = chlo.broadcast_add %799, %790 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %801 = chlo.broadcast_multiply %800, %cst_103 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %802 = chlo.broadcast_add %801, %cst_104 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %803 = "mhlo.reshape"(%802) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %804 = "mhlo.dot"(%803, %cst_99) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %805 = chlo.broadcast_add %804, %cst_100 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %806 = "mhlo.reshape"(%805) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %807 = chlo.broadcast_maximum %806, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %808 = "mhlo.reshape"(%807) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %809 = "mhlo.dot"(%808, %cst_91) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %810 = chlo.broadcast_add %809, %cst_92 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %811 = "mhlo.reshape"(%810) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %812 = chlo.broadcast_add %811, %802 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %813 = chlo.broadcast_multiply %812, %cst_97 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %814 = chlo.broadcast_add %813, %cst_98 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %815 = "mhlo.reshape"(%814) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %816 = "mhlo.dot"(%815, %cst_93) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %817 = chlo.broadcast_add %816, %cst_94 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %818 = "mhlo.reshape"(%817) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %819 = chlo.broadcast_add %818, %722 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %820 = chlo.broadcast_multiply %819, %cst_95 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %821 = chlo.broadcast_add %820, %cst_96 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %822 = "mhlo.reshape"(%821) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %823 = "mhlo.dot"(%822, %cst_81) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %824 = chlo.broadcast_add %823, %cst_82 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %825 = "mhlo.reshape"(%824) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %826 = "mhlo.transpose"(%825) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %827 = "mhlo.dot"(%822, %cst_77) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %828 = "mhlo.reshape"(%827) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %829 = "mhlo.broadcast_in_dim"(%cst_78) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %830 = mhlo.add %828, %829 : tensor<1x384x128xf32> | |
| %831 = chlo.broadcast_multiply %830, %cst_79 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %832 = chlo.broadcast_add %831, %cst_80 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %833 = "mhlo.reshape"(%832) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %834 = "mhlo.dot"(%833, %cst_85) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %835 = chlo.broadcast_add %834, %cst_86 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %836 = "mhlo.reshape"(%835) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %837 = "mhlo.transpose"(%836) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %838 = "mhlo.dot"(%833, %cst_83) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %839 = chlo.broadcast_add %838, %cst_84 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %840 = "mhlo.reshape"(%839) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %841 = "mhlo.transpose"(%840) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %842 = "mhlo.dot_general"(%841, %837) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %843 = chlo.broadcast_multiply %842, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %844 = chlo.broadcast_add %843, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %845 = "mhlo.reduce"(%844, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %846 = linalg.tensor_expand_shape %845 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %847 = chlo.broadcast_subtract %844, %846 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %848 = "mhlo.exponential"(%847) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %849 = "mhlo.reduce"(%848, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %850 = linalg.tensor_expand_shape %849 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %851 = chlo.broadcast_divide %848, %850 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %852 = "mhlo.dot_general"(%851, %826) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %853 = "mhlo.transpose"(%852) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %854 = "mhlo.reshape"(%853) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %855 = "mhlo.dot"(%854, %cst_87) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %856 = chlo.broadcast_add %855, %cst_88 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %857 = "mhlo.reshape"(%856) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %858 = "mhlo.dot"(%822, %cst_74) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %859 = chlo.broadcast_add %858, %cst_75 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %860 = "mhlo.reshape"(%859) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %861 = chlo.broadcast_multiply %860, %cst_76 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %862 = chlo.broadcast_add %861, %cst_88 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %863 = chlo.broadcast_add %857, %862 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %864 = chlo.broadcast_multiply %863, %cst_89 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %865 = chlo.broadcast_add %864, %cst_90 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %866 = "mhlo.reshape"(%865) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %867 = "mhlo.dot"(%866, %cst_72) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %868 = chlo.broadcast_add %867, %cst_73 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %869 = "mhlo.reshape"(%868) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %870 = chlo.broadcast_maximum %869, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %871 = "mhlo.reshape"(%870) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %872 = "mhlo.dot"(%871, %cst_68) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %873 = chlo.broadcast_add %872, %cst_69 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %874 = "mhlo.reshape"(%873) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %875 = chlo.broadcast_add %874, %865 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %876 = chlo.broadcast_multiply %875, %cst_70 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %877 = chlo.broadcast_add %876, %cst_71 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %878 = "mhlo.reshape"(%877) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %879 = "mhlo.dot"(%878, %cst_66) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %880 = chlo.broadcast_add %879, %cst_67 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %881 = "mhlo.reshape"(%880) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %882 = chlo.broadcast_maximum %881, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %883 = "mhlo.reshape"(%882) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %884 = "mhlo.dot"(%883, %cst_62) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %885 = chlo.broadcast_add %884, %cst_63 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %886 = "mhlo.reshape"(%885) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %887 = chlo.broadcast_add %886, %877 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %888 = chlo.broadcast_multiply %887, %cst_64 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %889 = chlo.broadcast_add %888, %cst_65 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %890 = "mhlo.reshape"(%889) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %891 = "mhlo.dot"(%890, %cst_60) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %892 = chlo.broadcast_add %891, %cst_61 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %893 = "mhlo.reshape"(%892) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %894 = chlo.broadcast_maximum %893, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %895 = "mhlo.reshape"(%894) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %896 = "mhlo.dot"(%895, %cst_56) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %897 = chlo.broadcast_add %896, %cst_57 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %898 = "mhlo.reshape"(%897) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %899 = chlo.broadcast_add %898, %889 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %900 = chlo.broadcast_multiply %899, %cst_58 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %901 = chlo.broadcast_add %900, %cst_59 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %902 = "mhlo.reshape"(%901) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %903 = "mhlo.dot"(%902, %cst_54) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %904 = chlo.broadcast_add %903, %cst_55 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %905 = "mhlo.reshape"(%904) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %906 = chlo.broadcast_maximum %905, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %907 = "mhlo.reshape"(%906) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %908 = "mhlo.dot"(%907, %cst_46) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %909 = chlo.broadcast_add %908, %cst_47 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %910 = "mhlo.reshape"(%909) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %911 = chlo.broadcast_add %910, %901 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %912 = chlo.broadcast_multiply %911, %cst_52 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %913 = chlo.broadcast_add %912, %cst_53 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %914 = "mhlo.reshape"(%913) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %915 = "mhlo.dot"(%914, %cst_48) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %916 = chlo.broadcast_add %915, %cst_49 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %917 = "mhlo.reshape"(%916) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %918 = chlo.broadcast_add %917, %821 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %919 = chlo.broadcast_multiply %918, %cst_50 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %920 = chlo.broadcast_add %919, %cst_51 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %921 = "mhlo.reshape"(%920) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %922 = "mhlo.dot"(%921, %cst_36) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %923 = chlo.broadcast_add %922, %cst_37 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %924 = "mhlo.reshape"(%923) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %925 = "mhlo.transpose"(%924) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %926 = "mhlo.dot"(%921, %cst_32) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %927 = "mhlo.reshape"(%926) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %928 = "mhlo.broadcast_in_dim"(%cst_33) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %929 = mhlo.add %927, %928 : tensor<1x384x128xf32> | |
| %930 = chlo.broadcast_multiply %929, %cst_34 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %931 = chlo.broadcast_add %930, %cst_35 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %932 = "mhlo.reshape"(%931) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %933 = "mhlo.dot"(%932, %cst_40) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %934 = chlo.broadcast_add %933, %cst_41 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %935 = "mhlo.reshape"(%934) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %936 = "mhlo.transpose"(%935) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %937 = "mhlo.dot"(%932, %cst_38) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %938 = chlo.broadcast_add %937, %cst_39 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %939 = "mhlo.reshape"(%938) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %940 = "mhlo.transpose"(%939) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %941 = "mhlo.dot_general"(%940, %936) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %942 = chlo.broadcast_multiply %941, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %943 = chlo.broadcast_add %942, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %944 = "mhlo.reduce"(%943, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %945 = linalg.tensor_expand_shape %944 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %946 = chlo.broadcast_subtract %943, %945 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %947 = "mhlo.exponential"(%946) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %948 = "mhlo.reduce"(%947, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %949 = linalg.tensor_expand_shape %948 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %950 = chlo.broadcast_divide %947, %949 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %951 = "mhlo.dot_general"(%950, %925) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %952 = "mhlo.transpose"(%951) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %953 = "mhlo.reshape"(%952) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %954 = "mhlo.dot"(%953, %cst_42) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %955 = chlo.broadcast_add %954, %cst_43 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %956 = "mhlo.reshape"(%955) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %957 = "mhlo.dot"(%921, %cst_29) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %958 = chlo.broadcast_add %957, %cst_30 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %959 = "mhlo.reshape"(%958) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %960 = chlo.broadcast_multiply %959, %cst_31 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %961 = chlo.broadcast_add %960, %cst_43 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %962 = chlo.broadcast_add %956, %961 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %963 = chlo.broadcast_multiply %962, %cst_44 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %964 = chlo.broadcast_add %963, %cst_45 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %965 = "mhlo.reshape"(%964) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %966 = "mhlo.dot"(%965, %cst_27) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %967 = chlo.broadcast_add %966, %cst_28 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %968 = "mhlo.reshape"(%967) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %969 = chlo.broadcast_maximum %968, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %970 = "mhlo.reshape"(%969) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %971 = "mhlo.dot"(%970, %cst_23) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %972 = chlo.broadcast_add %971, %cst_24 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %973 = "mhlo.reshape"(%972) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %974 = chlo.broadcast_add %973, %964 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %975 = chlo.broadcast_multiply %974, %cst_25 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %976 = chlo.broadcast_add %975, %cst_26 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %977 = "mhlo.reshape"(%976) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %978 = "mhlo.dot"(%977, %cst_21) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %979 = chlo.broadcast_add %978, %cst_22 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %980 = "mhlo.reshape"(%979) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %981 = chlo.broadcast_maximum %980, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %982 = "mhlo.reshape"(%981) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %983 = "mhlo.dot"(%982, %cst_17) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %984 = chlo.broadcast_add %983, %cst_18 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %985 = "mhlo.reshape"(%984) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %986 = chlo.broadcast_add %985, %976 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %987 = chlo.broadcast_multiply %986, %cst_19 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %988 = chlo.broadcast_add %987, %cst_20 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %989 = "mhlo.reshape"(%988) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %990 = "mhlo.dot"(%989, %cst_15) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %991 = chlo.broadcast_add %990, %cst_16 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %992 = "mhlo.reshape"(%991) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %993 = chlo.broadcast_maximum %992, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %994 = "mhlo.reshape"(%993) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %995 = "mhlo.dot"(%994, %cst_11) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %996 = chlo.broadcast_add %995, %cst_12 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %997 = "mhlo.reshape"(%996) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %998 = chlo.broadcast_add %997, %988 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %999 = chlo.broadcast_multiply %998, %cst_13 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1000 = chlo.broadcast_add %999, %cst_14 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1001 = "mhlo.reshape"(%1000) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1002 = "mhlo.dot"(%1001, %cst_9) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1003 = chlo.broadcast_add %1002, %cst_10 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1004 = "mhlo.reshape"(%1003) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1005 = chlo.broadcast_maximum %1004, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1006 = "mhlo.reshape"(%1005) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1007 = "mhlo.dot"(%1006, %cst_1) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1008 = chlo.broadcast_add %1007, %cst_2 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1009 = "mhlo.reshape"(%1008) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1010 = chlo.broadcast_add %1009, %1000 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1011 = chlo.broadcast_multiply %1010, %cst_7 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1012 = chlo.broadcast_add %1011, %cst_8 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1013 = "mhlo.reshape"(%1012) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1014 = "mhlo.dot"(%1013, %cst_3) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1015 = chlo.broadcast_add %1014, %cst_4 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1016 = "mhlo.reshape"(%1015) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1017 = chlo.broadcast_add %1016, %920 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1018 = chlo.broadcast_multiply %1017, %cst_5 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1019 = chlo.broadcast_add %1018, %cst_6 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1020 = "mhlo.reshape"(%1019) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1021 = "mhlo.dot"(%1020, %cst_981) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1022 = chlo.broadcast_add %1021, %cst_982 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1023 = "mhlo.reshape"(%1022) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1024 = "mhlo.transpose"(%1023) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1025 = "mhlo.dot"(%1020, %cst_977) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1026 = "mhlo.reshape"(%1025) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1027 = "mhlo.broadcast_in_dim"(%cst_978) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1028 = mhlo.add %1026, %1027 : tensor<1x384x128xf32> | |
| %1029 = chlo.broadcast_multiply %1028, %cst_979 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1030 = chlo.broadcast_add %1029, %cst_980 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1031 = "mhlo.reshape"(%1030) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1032 = "mhlo.dot"(%1031, %cst_985) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1033 = chlo.broadcast_add %1032, %cst_986 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1034 = "mhlo.reshape"(%1033) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1035 = "mhlo.transpose"(%1034) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1036 = "mhlo.dot"(%1031, %cst_983) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1037 = chlo.broadcast_add %1036, %cst_984 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1038 = "mhlo.reshape"(%1037) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1039 = "mhlo.transpose"(%1038) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1040 = "mhlo.dot_general"(%1039, %1035) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1041 = chlo.broadcast_multiply %1040, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1042 = chlo.broadcast_add %1041, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1043 = "mhlo.reduce"(%1042, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1044 = linalg.tensor_expand_shape %1043 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1045 = chlo.broadcast_subtract %1042, %1044 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1046 = "mhlo.exponential"(%1045) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1047 = "mhlo.reduce"(%1046, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1048 = linalg.tensor_expand_shape %1047 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1049 = chlo.broadcast_divide %1046, %1048 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1050 = "mhlo.dot_general"(%1049, %1024) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1051 = "mhlo.transpose"(%1050) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1052 = "mhlo.reshape"(%1051) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1053 = "mhlo.dot"(%1052, %cst_987) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1054 = chlo.broadcast_add %1053, %cst_988 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1055 = "mhlo.reshape"(%1054) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1056 = "mhlo.dot"(%1020, %cst_974) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1057 = chlo.broadcast_add %1056, %cst_975 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1058 = "mhlo.reshape"(%1057) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1059 = chlo.broadcast_multiply %1058, %cst_976 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1060 = chlo.broadcast_add %1059, %cst_988 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1061 = chlo.broadcast_add %1055, %1060 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1062 = chlo.broadcast_multiply %1061, %cst_989 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1063 = chlo.broadcast_add %1062, %cst_990 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1064 = "mhlo.reshape"(%1063) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1065 = "mhlo.dot"(%1064, %cst_972) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1066 = chlo.broadcast_add %1065, %cst_973 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1067 = "mhlo.reshape"(%1066) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1068 = chlo.broadcast_maximum %1067, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1069 = "mhlo.reshape"(%1068) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1070 = "mhlo.dot"(%1069, %cst_968) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1071 = chlo.broadcast_add %1070, %cst_969 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1072 = "mhlo.reshape"(%1071) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1073 = chlo.broadcast_add %1072, %1063 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1074 = chlo.broadcast_multiply %1073, %cst_970 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1075 = chlo.broadcast_add %1074, %cst_971 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1076 = "mhlo.reshape"(%1075) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1077 = "mhlo.dot"(%1076, %cst_966) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1078 = chlo.broadcast_add %1077, %cst_967 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1079 = "mhlo.reshape"(%1078) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1080 = chlo.broadcast_maximum %1079, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1081 = "mhlo.reshape"(%1080) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1082 = "mhlo.dot"(%1081, %cst_962) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1083 = chlo.broadcast_add %1082, %cst_963 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1084 = "mhlo.reshape"(%1083) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1085 = chlo.broadcast_add %1084, %1075 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1086 = chlo.broadcast_multiply %1085, %cst_964 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1087 = chlo.broadcast_add %1086, %cst_965 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1088 = "mhlo.reshape"(%1087) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1089 = "mhlo.dot"(%1088, %cst_960) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1090 = chlo.broadcast_add %1089, %cst_961 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1091 = "mhlo.reshape"(%1090) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1092 = chlo.broadcast_maximum %1091, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1093 = "mhlo.reshape"(%1092) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1094 = "mhlo.dot"(%1093, %cst_956) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1095 = chlo.broadcast_add %1094, %cst_957 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1096 = "mhlo.reshape"(%1095) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1097 = chlo.broadcast_add %1096, %1087 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1098 = chlo.broadcast_multiply %1097, %cst_958 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1099 = chlo.broadcast_add %1098, %cst_959 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1100 = "mhlo.reshape"(%1099) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1101 = "mhlo.dot"(%1100, %cst_954) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1102 = chlo.broadcast_add %1101, %cst_955 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1103 = "mhlo.reshape"(%1102) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1104 = chlo.broadcast_maximum %1103, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1105 = "mhlo.reshape"(%1104) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1106 = "mhlo.dot"(%1105, %cst_946) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1107 = chlo.broadcast_add %1106, %cst_947 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1108 = "mhlo.reshape"(%1107) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1109 = chlo.broadcast_add %1108, %1099 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1110 = chlo.broadcast_multiply %1109, %cst_952 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1111 = chlo.broadcast_add %1110, %cst_953 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1112 = "mhlo.reshape"(%1111) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1113 = "mhlo.dot"(%1112, %cst_948) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1114 = chlo.broadcast_add %1113, %cst_949 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1115 = "mhlo.reshape"(%1114) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1116 = chlo.broadcast_add %1115, %1019 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1117 = chlo.broadcast_multiply %1116, %cst_950 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1118 = chlo.broadcast_add %1117, %cst_951 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1119 = "mhlo.reshape"(%1118) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1120 = "mhlo.dot"(%1119, %cst_936) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1121 = chlo.broadcast_add %1120, %cst_937 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1122 = "mhlo.reshape"(%1121) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1123 = "mhlo.transpose"(%1122) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1124 = "mhlo.dot"(%1119, %cst_932) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1125 = "mhlo.reshape"(%1124) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1126 = "mhlo.broadcast_in_dim"(%cst_933) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1127 = mhlo.add %1125, %1126 : tensor<1x384x128xf32> | |
| %1128 = chlo.broadcast_multiply %1127, %cst_934 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1129 = chlo.broadcast_add %1128, %cst_935 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1130 = "mhlo.reshape"(%1129) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1131 = "mhlo.dot"(%1130, %cst_940) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1132 = chlo.broadcast_add %1131, %cst_941 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1133 = "mhlo.reshape"(%1132) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1134 = "mhlo.transpose"(%1133) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1135 = "mhlo.dot"(%1130, %cst_938) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1136 = chlo.broadcast_add %1135, %cst_939 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1137 = "mhlo.reshape"(%1136) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1138 = "mhlo.transpose"(%1137) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1139 = "mhlo.dot_general"(%1138, %1134) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1140 = chlo.broadcast_multiply %1139, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1141 = chlo.broadcast_add %1140, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1142 = "mhlo.reduce"(%1141, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1143 = linalg.tensor_expand_shape %1142 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1144 = chlo.broadcast_subtract %1141, %1143 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1145 = "mhlo.exponential"(%1144) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1146 = "mhlo.reduce"(%1145, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1147 = linalg.tensor_expand_shape %1146 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1148 = chlo.broadcast_divide %1145, %1147 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1149 = "mhlo.dot_general"(%1148, %1123) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1150 = "mhlo.transpose"(%1149) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1151 = "mhlo.reshape"(%1150) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1152 = "mhlo.dot"(%1151, %cst_942) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1153 = chlo.broadcast_add %1152, %cst_943 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1154 = "mhlo.reshape"(%1153) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1155 = "mhlo.dot"(%1119, %cst_929) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1156 = chlo.broadcast_add %1155, %cst_930 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1157 = "mhlo.reshape"(%1156) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1158 = chlo.broadcast_multiply %1157, %cst_931 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1159 = chlo.broadcast_add %1158, %cst_943 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1160 = chlo.broadcast_add %1154, %1159 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1161 = chlo.broadcast_multiply %1160, %cst_944 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1162 = chlo.broadcast_add %1161, %cst_945 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1163 = "mhlo.reshape"(%1162) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1164 = "mhlo.dot"(%1163, %cst_927) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1165 = chlo.broadcast_add %1164, %cst_928 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1166 = "mhlo.reshape"(%1165) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1167 = chlo.broadcast_maximum %1166, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1168 = "mhlo.reshape"(%1167) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1169 = "mhlo.dot"(%1168, %cst_923) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1170 = chlo.broadcast_add %1169, %cst_924 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1171 = "mhlo.reshape"(%1170) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1172 = chlo.broadcast_add %1171, %1162 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1173 = chlo.broadcast_multiply %1172, %cst_925 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1174 = chlo.broadcast_add %1173, %cst_926 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1175 = "mhlo.reshape"(%1174) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1176 = "mhlo.dot"(%1175, %cst_921) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1177 = chlo.broadcast_add %1176, %cst_922 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1178 = "mhlo.reshape"(%1177) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1179 = chlo.broadcast_maximum %1178, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1180 = "mhlo.reshape"(%1179) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1181 = "mhlo.dot"(%1180, %cst_917) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1182 = chlo.broadcast_add %1181, %cst_918 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1183 = "mhlo.reshape"(%1182) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1184 = chlo.broadcast_add %1183, %1174 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1185 = chlo.broadcast_multiply %1184, %cst_919 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1186 = chlo.broadcast_add %1185, %cst_920 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1187 = "mhlo.reshape"(%1186) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1188 = "mhlo.dot"(%1187, %cst_915) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1189 = chlo.broadcast_add %1188, %cst_916 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1190 = "mhlo.reshape"(%1189) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1191 = chlo.broadcast_maximum %1190, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1192 = "mhlo.reshape"(%1191) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1193 = "mhlo.dot"(%1192, %cst_911) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1194 = chlo.broadcast_add %1193, %cst_912 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1195 = "mhlo.reshape"(%1194) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1196 = chlo.broadcast_add %1195, %1186 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1197 = chlo.broadcast_multiply %1196, %cst_913 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1198 = chlo.broadcast_add %1197, %cst_914 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1199 = "mhlo.reshape"(%1198) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1200 = "mhlo.dot"(%1199, %cst_909) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1201 = chlo.broadcast_add %1200, %cst_910 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1202 = "mhlo.reshape"(%1201) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1203 = chlo.broadcast_maximum %1202, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1204 = "mhlo.reshape"(%1203) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1205 = "mhlo.dot"(%1204, %cst_901) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1206 = chlo.broadcast_add %1205, %cst_902 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1207 = "mhlo.reshape"(%1206) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1208 = chlo.broadcast_add %1207, %1198 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1209 = chlo.broadcast_multiply %1208, %cst_907 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1210 = chlo.broadcast_add %1209, %cst_908 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1211 = "mhlo.reshape"(%1210) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1212 = "mhlo.dot"(%1211, %cst_903) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1213 = chlo.broadcast_add %1212, %cst_904 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1214 = "mhlo.reshape"(%1213) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1215 = chlo.broadcast_add %1214, %1118 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1216 = chlo.broadcast_multiply %1215, %cst_905 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1217 = chlo.broadcast_add %1216, %cst_906 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1218 = "mhlo.reshape"(%1217) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1219 = "mhlo.dot"(%1218, %cst_891) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1220 = chlo.broadcast_add %1219, %cst_892 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1221 = "mhlo.reshape"(%1220) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1222 = "mhlo.transpose"(%1221) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1223 = "mhlo.dot"(%1218, %cst_887) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1224 = "mhlo.reshape"(%1223) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1225 = "mhlo.broadcast_in_dim"(%cst_888) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1226 = mhlo.add %1224, %1225 : tensor<1x384x128xf32> | |
| %1227 = chlo.broadcast_multiply %1226, %cst_889 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1228 = chlo.broadcast_add %1227, %cst_890 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1229 = "mhlo.reshape"(%1228) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1230 = "mhlo.dot"(%1229, %cst_895) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1231 = chlo.broadcast_add %1230, %cst_896 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1232 = "mhlo.reshape"(%1231) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1233 = "mhlo.transpose"(%1232) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1234 = "mhlo.dot"(%1229, %cst_893) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1235 = chlo.broadcast_add %1234, %cst_894 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1236 = "mhlo.reshape"(%1235) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1237 = "mhlo.transpose"(%1236) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1238 = "mhlo.dot_general"(%1237, %1233) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1239 = chlo.broadcast_multiply %1238, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1240 = chlo.broadcast_add %1239, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1241 = "mhlo.reduce"(%1240, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1242 = linalg.tensor_expand_shape %1241 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1243 = chlo.broadcast_subtract %1240, %1242 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1244 = "mhlo.exponential"(%1243) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1245 = "mhlo.reduce"(%1244, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1246 = linalg.tensor_expand_shape %1245 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1247 = chlo.broadcast_divide %1244, %1246 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1248 = "mhlo.dot_general"(%1247, %1222) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1249 = "mhlo.transpose"(%1248) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1250 = "mhlo.reshape"(%1249) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1251 = "mhlo.dot"(%1250, %cst_897) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1252 = chlo.broadcast_add %1251, %cst_898 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1253 = "mhlo.reshape"(%1252) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1254 = "mhlo.dot"(%1218, %cst_884) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1255 = chlo.broadcast_add %1254, %cst_885 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1256 = "mhlo.reshape"(%1255) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1257 = chlo.broadcast_multiply %1256, %cst_886 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1258 = chlo.broadcast_add %1257, %cst_898 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1259 = chlo.broadcast_add %1253, %1258 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1260 = chlo.broadcast_multiply %1259, %cst_899 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1261 = chlo.broadcast_add %1260, %cst_900 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1262 = "mhlo.reshape"(%1261) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1263 = "mhlo.dot"(%1262, %cst_882) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1264 = chlo.broadcast_add %1263, %cst_883 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1265 = "mhlo.reshape"(%1264) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1266 = chlo.broadcast_maximum %1265, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1267 = "mhlo.reshape"(%1266) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1268 = "mhlo.dot"(%1267, %cst_878) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1269 = chlo.broadcast_add %1268, %cst_879 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1270 = "mhlo.reshape"(%1269) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1271 = chlo.broadcast_add %1270, %1261 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1272 = chlo.broadcast_multiply %1271, %cst_880 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1273 = chlo.broadcast_add %1272, %cst_881 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1274 = "mhlo.reshape"(%1273) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1275 = "mhlo.dot"(%1274, %cst_876) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1276 = chlo.broadcast_add %1275, %cst_877 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1277 = "mhlo.reshape"(%1276) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1278 = chlo.broadcast_maximum %1277, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1279 = "mhlo.reshape"(%1278) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1280 = "mhlo.dot"(%1279, %cst_872) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1281 = chlo.broadcast_add %1280, %cst_873 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1282 = "mhlo.reshape"(%1281) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1283 = chlo.broadcast_add %1282, %1273 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1284 = chlo.broadcast_multiply %1283, %cst_874 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1285 = chlo.broadcast_add %1284, %cst_875 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1286 = "mhlo.reshape"(%1285) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1287 = "mhlo.dot"(%1286, %cst_870) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1288 = chlo.broadcast_add %1287, %cst_871 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1289 = "mhlo.reshape"(%1288) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1290 = chlo.broadcast_maximum %1289, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1291 = "mhlo.reshape"(%1290) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1292 = "mhlo.dot"(%1291, %cst_866) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1293 = chlo.broadcast_add %1292, %cst_867 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1294 = "mhlo.reshape"(%1293) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1295 = chlo.broadcast_add %1294, %1285 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1296 = chlo.broadcast_multiply %1295, %cst_868 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1297 = chlo.broadcast_add %1296, %cst_869 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1298 = "mhlo.reshape"(%1297) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1299 = "mhlo.dot"(%1298, %cst_864) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1300 = chlo.broadcast_add %1299, %cst_865 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1301 = "mhlo.reshape"(%1300) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1302 = chlo.broadcast_maximum %1301, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1303 = "mhlo.reshape"(%1302) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1304 = "mhlo.dot"(%1303, %cst_856) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1305 = chlo.broadcast_add %1304, %cst_857 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1306 = "mhlo.reshape"(%1305) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1307 = chlo.broadcast_add %1306, %1297 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1308 = chlo.broadcast_multiply %1307, %cst_862 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1309 = chlo.broadcast_add %1308, %cst_863 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1310 = "mhlo.reshape"(%1309) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1311 = "mhlo.dot"(%1310, %cst_858) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1312 = chlo.broadcast_add %1311, %cst_859 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1313 = "mhlo.reshape"(%1312) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1314 = chlo.broadcast_add %1313, %1217 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1315 = chlo.broadcast_multiply %1314, %cst_860 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1316 = chlo.broadcast_add %1315, %cst_861 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1317 = "mhlo.reshape"(%1316) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1318 = "mhlo.dot"(%1317, %cst_846) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1319 = chlo.broadcast_add %1318, %cst_847 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1320 = "mhlo.reshape"(%1319) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1321 = "mhlo.transpose"(%1320) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1322 = "mhlo.dot"(%1317, %cst_842) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1323 = "mhlo.reshape"(%1322) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1324 = "mhlo.broadcast_in_dim"(%cst_843) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1325 = mhlo.add %1323, %1324 : tensor<1x384x128xf32> | |
| %1326 = chlo.broadcast_multiply %1325, %cst_844 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1327 = chlo.broadcast_add %1326, %cst_845 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1328 = "mhlo.reshape"(%1327) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1329 = "mhlo.dot"(%1328, %cst_850) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1330 = chlo.broadcast_add %1329, %cst_851 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1331 = "mhlo.reshape"(%1330) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1332 = "mhlo.transpose"(%1331) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1333 = "mhlo.dot"(%1328, %cst_848) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1334 = chlo.broadcast_add %1333, %cst_849 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1335 = "mhlo.reshape"(%1334) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1336 = "mhlo.transpose"(%1335) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1337 = "mhlo.dot_general"(%1336, %1332) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1338 = chlo.broadcast_multiply %1337, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1339 = chlo.broadcast_add %1338, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1340 = "mhlo.reduce"(%1339, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1341 = linalg.tensor_expand_shape %1340 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1342 = chlo.broadcast_subtract %1339, %1341 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1343 = "mhlo.exponential"(%1342) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1344 = "mhlo.reduce"(%1343, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1345 = linalg.tensor_expand_shape %1344 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1346 = chlo.broadcast_divide %1343, %1345 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1347 = "mhlo.dot_general"(%1346, %1321) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1348 = "mhlo.transpose"(%1347) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1349 = "mhlo.reshape"(%1348) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1350 = "mhlo.dot"(%1349, %cst_852) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1351 = chlo.broadcast_add %1350, %cst_853 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1352 = "mhlo.reshape"(%1351) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1353 = "mhlo.dot"(%1317, %cst_839) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1354 = chlo.broadcast_add %1353, %cst_840 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1355 = "mhlo.reshape"(%1354) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1356 = chlo.broadcast_multiply %1355, %cst_841 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1357 = chlo.broadcast_add %1356, %cst_853 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1358 = chlo.broadcast_add %1352, %1357 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1359 = chlo.broadcast_multiply %1358, %cst_854 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1360 = chlo.broadcast_add %1359, %cst_855 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1361 = "mhlo.reshape"(%1360) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1362 = "mhlo.dot"(%1361, %cst_837) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1363 = chlo.broadcast_add %1362, %cst_838 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1364 = "mhlo.reshape"(%1363) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1365 = chlo.broadcast_maximum %1364, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1366 = "mhlo.reshape"(%1365) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1367 = "mhlo.dot"(%1366, %cst_833) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1368 = chlo.broadcast_add %1367, %cst_834 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1369 = "mhlo.reshape"(%1368) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1370 = chlo.broadcast_add %1369, %1360 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1371 = chlo.broadcast_multiply %1370, %cst_835 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1372 = chlo.broadcast_add %1371, %cst_836 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1373 = "mhlo.reshape"(%1372) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1374 = "mhlo.dot"(%1373, %cst_831) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1375 = chlo.broadcast_add %1374, %cst_832 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1376 = "mhlo.reshape"(%1375) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1377 = chlo.broadcast_maximum %1376, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1378 = "mhlo.reshape"(%1377) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1379 = "mhlo.dot"(%1378, %cst_827) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1380 = chlo.broadcast_add %1379, %cst_828 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1381 = "mhlo.reshape"(%1380) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1382 = chlo.broadcast_add %1381, %1372 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1383 = chlo.broadcast_multiply %1382, %cst_829 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1384 = chlo.broadcast_add %1383, %cst_830 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1385 = "mhlo.reshape"(%1384) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1386 = "mhlo.dot"(%1385, %cst_825) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1387 = chlo.broadcast_add %1386, %cst_826 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1388 = "mhlo.reshape"(%1387) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1389 = chlo.broadcast_maximum %1388, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1390 = "mhlo.reshape"(%1389) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1391 = "mhlo.dot"(%1390, %cst_821) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1392 = chlo.broadcast_add %1391, %cst_822 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1393 = "mhlo.reshape"(%1392) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1394 = chlo.broadcast_add %1393, %1384 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1395 = chlo.broadcast_multiply %1394, %cst_823 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1396 = chlo.broadcast_add %1395, %cst_824 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1397 = "mhlo.reshape"(%1396) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1398 = "mhlo.dot"(%1397, %cst_819) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1399 = chlo.broadcast_add %1398, %cst_820 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1400 = "mhlo.reshape"(%1399) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1401 = chlo.broadcast_maximum %1400, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1402 = "mhlo.reshape"(%1401) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1403 = "mhlo.dot"(%1402, %cst_811) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1404 = chlo.broadcast_add %1403, %cst_812 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1405 = "mhlo.reshape"(%1404) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1406 = chlo.broadcast_add %1405, %1396 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1407 = chlo.broadcast_multiply %1406, %cst_817 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1408 = chlo.broadcast_add %1407, %cst_818 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1409 = "mhlo.reshape"(%1408) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1410 = "mhlo.dot"(%1409, %cst_813) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1411 = chlo.broadcast_add %1410, %cst_814 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1412 = "mhlo.reshape"(%1411) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1413 = chlo.broadcast_add %1412, %1316 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1414 = chlo.broadcast_multiply %1413, %cst_815 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1415 = chlo.broadcast_add %1414, %cst_816 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1416 = "mhlo.reshape"(%1415) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1417 = "mhlo.dot"(%1416, %cst_801) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1418 = chlo.broadcast_add %1417, %cst_802 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1419 = "mhlo.reshape"(%1418) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1420 = "mhlo.transpose"(%1419) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1421 = "mhlo.dot"(%1416, %cst_797) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1422 = "mhlo.reshape"(%1421) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1423 = "mhlo.broadcast_in_dim"(%cst_798) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1424 = mhlo.add %1422, %1423 : tensor<1x384x128xf32> | |
| %1425 = chlo.broadcast_multiply %1424, %cst_799 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1426 = chlo.broadcast_add %1425, %cst_800 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1427 = "mhlo.reshape"(%1426) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1428 = "mhlo.dot"(%1427, %cst_805) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1429 = chlo.broadcast_add %1428, %cst_806 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1430 = "mhlo.reshape"(%1429) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1431 = "mhlo.transpose"(%1430) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1432 = "mhlo.dot"(%1427, %cst_803) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1433 = chlo.broadcast_add %1432, %cst_804 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1434 = "mhlo.reshape"(%1433) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1435 = "mhlo.transpose"(%1434) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1436 = "mhlo.dot_general"(%1435, %1431) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1437 = chlo.broadcast_multiply %1436, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1438 = chlo.broadcast_add %1437, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1439 = "mhlo.reduce"(%1438, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1440 = linalg.tensor_expand_shape %1439 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1441 = chlo.broadcast_subtract %1438, %1440 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1442 = "mhlo.exponential"(%1441) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1443 = "mhlo.reduce"(%1442, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1444 = linalg.tensor_expand_shape %1443 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1445 = chlo.broadcast_divide %1442, %1444 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1446 = "mhlo.dot_general"(%1445, %1420) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1447 = "mhlo.transpose"(%1446) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1448 = "mhlo.reshape"(%1447) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1449 = "mhlo.dot"(%1448, %cst_807) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1450 = chlo.broadcast_add %1449, %cst_808 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1451 = "mhlo.reshape"(%1450) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1452 = "mhlo.dot"(%1416, %cst_794) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1453 = chlo.broadcast_add %1452, %cst_795 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1454 = "mhlo.reshape"(%1453) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1455 = chlo.broadcast_multiply %1454, %cst_796 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1456 = chlo.broadcast_add %1455, %cst_808 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1457 = chlo.broadcast_add %1451, %1456 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1458 = chlo.broadcast_multiply %1457, %cst_809 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1459 = chlo.broadcast_add %1458, %cst_810 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1460 = "mhlo.reshape"(%1459) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1461 = "mhlo.dot"(%1460, %cst_792) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1462 = chlo.broadcast_add %1461, %cst_793 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1463 = "mhlo.reshape"(%1462) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1464 = chlo.broadcast_maximum %1463, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1465 = "mhlo.reshape"(%1464) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1466 = "mhlo.dot"(%1465, %cst_788) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1467 = chlo.broadcast_add %1466, %cst_789 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1468 = "mhlo.reshape"(%1467) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1469 = chlo.broadcast_add %1468, %1459 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1470 = chlo.broadcast_multiply %1469, %cst_790 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1471 = chlo.broadcast_add %1470, %cst_791 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1472 = "mhlo.reshape"(%1471) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1473 = "mhlo.dot"(%1472, %cst_786) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1474 = chlo.broadcast_add %1473, %cst_787 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1475 = "mhlo.reshape"(%1474) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1476 = chlo.broadcast_maximum %1475, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1477 = "mhlo.reshape"(%1476) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1478 = "mhlo.dot"(%1477, %cst_782) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1479 = chlo.broadcast_add %1478, %cst_783 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1480 = "mhlo.reshape"(%1479) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1481 = chlo.broadcast_add %1480, %1471 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1482 = chlo.broadcast_multiply %1481, %cst_784 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1483 = chlo.broadcast_add %1482, %cst_785 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1484 = "mhlo.reshape"(%1483) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1485 = "mhlo.dot"(%1484, %cst_780) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1486 = chlo.broadcast_add %1485, %cst_781 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1487 = "mhlo.reshape"(%1486) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1488 = chlo.broadcast_maximum %1487, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1489 = "mhlo.reshape"(%1488) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1490 = "mhlo.dot"(%1489, %cst_776) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1491 = chlo.broadcast_add %1490, %cst_777 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1492 = "mhlo.reshape"(%1491) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1493 = chlo.broadcast_add %1492, %1483 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1494 = chlo.broadcast_multiply %1493, %cst_778 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1495 = chlo.broadcast_add %1494, %cst_779 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1496 = "mhlo.reshape"(%1495) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1497 = "mhlo.dot"(%1496, %cst_774) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1498 = chlo.broadcast_add %1497, %cst_775 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1499 = "mhlo.reshape"(%1498) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1500 = chlo.broadcast_maximum %1499, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1501 = "mhlo.reshape"(%1500) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1502 = "mhlo.dot"(%1501, %cst_766) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1503 = chlo.broadcast_add %1502, %cst_767 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1504 = "mhlo.reshape"(%1503) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1505 = chlo.broadcast_add %1504, %1495 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1506 = chlo.broadcast_multiply %1505, %cst_772 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1507 = chlo.broadcast_add %1506, %cst_773 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1508 = "mhlo.reshape"(%1507) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1509 = "mhlo.dot"(%1508, %cst_768) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1510 = chlo.broadcast_add %1509, %cst_769 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1511 = "mhlo.reshape"(%1510) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1512 = chlo.broadcast_add %1511, %1415 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1513 = chlo.broadcast_multiply %1512, %cst_770 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1514 = chlo.broadcast_add %1513, %cst_771 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1515 = "mhlo.reshape"(%1514) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1516 = "mhlo.dot"(%1515, %cst_756) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1517 = chlo.broadcast_add %1516, %cst_757 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1518 = "mhlo.reshape"(%1517) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1519 = "mhlo.transpose"(%1518) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1520 = "mhlo.dot"(%1515, %cst_752) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1521 = "mhlo.reshape"(%1520) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1522 = "mhlo.broadcast_in_dim"(%cst_753) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1523 = mhlo.add %1521, %1522 : tensor<1x384x128xf32> | |
| %1524 = chlo.broadcast_multiply %1523, %cst_754 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1525 = chlo.broadcast_add %1524, %cst_755 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1526 = "mhlo.reshape"(%1525) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1527 = "mhlo.dot"(%1526, %cst_760) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1528 = chlo.broadcast_add %1527, %cst_761 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1529 = "mhlo.reshape"(%1528) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1530 = "mhlo.transpose"(%1529) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1531 = "mhlo.dot"(%1526, %cst_758) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1532 = chlo.broadcast_add %1531, %cst_759 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1533 = "mhlo.reshape"(%1532) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1534 = "mhlo.transpose"(%1533) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1535 = "mhlo.dot_general"(%1534, %1530) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1536 = chlo.broadcast_multiply %1535, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1537 = chlo.broadcast_add %1536, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1538 = "mhlo.reduce"(%1537, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1539 = linalg.tensor_expand_shape %1538 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1540 = chlo.broadcast_subtract %1537, %1539 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1541 = "mhlo.exponential"(%1540) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1542 = "mhlo.reduce"(%1541, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1543 = linalg.tensor_expand_shape %1542 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1544 = chlo.broadcast_divide %1541, %1543 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1545 = "mhlo.dot_general"(%1544, %1519) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1546 = "mhlo.transpose"(%1545) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1547 = "mhlo.reshape"(%1546) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1548 = "mhlo.dot"(%1547, %cst_762) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1549 = chlo.broadcast_add %1548, %cst_763 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1550 = "mhlo.reshape"(%1549) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1551 = "mhlo.dot"(%1515, %cst_749) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1552 = chlo.broadcast_add %1551, %cst_750 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1553 = "mhlo.reshape"(%1552) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1554 = chlo.broadcast_multiply %1553, %cst_751 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1555 = chlo.broadcast_add %1554, %cst_763 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1556 = chlo.broadcast_add %1550, %1555 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1557 = chlo.broadcast_multiply %1556, %cst_764 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1558 = chlo.broadcast_add %1557, %cst_765 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1559 = "mhlo.reshape"(%1558) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1560 = "mhlo.dot"(%1559, %cst_747) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1561 = chlo.broadcast_add %1560, %cst_748 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1562 = "mhlo.reshape"(%1561) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1563 = chlo.broadcast_maximum %1562, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1564 = "mhlo.reshape"(%1563) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1565 = "mhlo.dot"(%1564, %cst_743) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1566 = chlo.broadcast_add %1565, %cst_744 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1567 = "mhlo.reshape"(%1566) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1568 = chlo.broadcast_add %1567, %1558 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1569 = chlo.broadcast_multiply %1568, %cst_745 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1570 = chlo.broadcast_add %1569, %cst_746 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1571 = "mhlo.reshape"(%1570) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1572 = "mhlo.dot"(%1571, %cst_741) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1573 = chlo.broadcast_add %1572, %cst_742 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1574 = "mhlo.reshape"(%1573) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1575 = chlo.broadcast_maximum %1574, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1576 = "mhlo.reshape"(%1575) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1577 = "mhlo.dot"(%1576, %cst_737) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1578 = chlo.broadcast_add %1577, %cst_738 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1579 = "mhlo.reshape"(%1578) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1580 = chlo.broadcast_add %1579, %1570 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1581 = chlo.broadcast_multiply %1580, %cst_739 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1582 = chlo.broadcast_add %1581, %cst_740 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1583 = "mhlo.reshape"(%1582) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1584 = "mhlo.dot"(%1583, %cst_735) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1585 = chlo.broadcast_add %1584, %cst_736 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1586 = "mhlo.reshape"(%1585) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1587 = chlo.broadcast_maximum %1586, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1588 = "mhlo.reshape"(%1587) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1589 = "mhlo.dot"(%1588, %cst_731) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1590 = chlo.broadcast_add %1589, %cst_732 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1591 = "mhlo.reshape"(%1590) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1592 = chlo.broadcast_add %1591, %1582 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1593 = chlo.broadcast_multiply %1592, %cst_733 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1594 = chlo.broadcast_add %1593, %cst_734 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1595 = "mhlo.reshape"(%1594) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1596 = "mhlo.dot"(%1595, %cst_729) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1597 = chlo.broadcast_add %1596, %cst_730 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1598 = "mhlo.reshape"(%1597) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1599 = chlo.broadcast_maximum %1598, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1600 = "mhlo.reshape"(%1599) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1601 = "mhlo.dot"(%1600, %cst_721) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1602 = chlo.broadcast_add %1601, %cst_722 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1603 = "mhlo.reshape"(%1602) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1604 = chlo.broadcast_add %1603, %1594 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1605 = chlo.broadcast_multiply %1604, %cst_727 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1606 = chlo.broadcast_add %1605, %cst_728 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1607 = "mhlo.reshape"(%1606) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1608 = "mhlo.dot"(%1607, %cst_723) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1609 = chlo.broadcast_add %1608, %cst_724 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1610 = "mhlo.reshape"(%1609) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1611 = chlo.broadcast_add %1610, %1514 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1612 = chlo.broadcast_multiply %1611, %cst_725 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1613 = chlo.broadcast_add %1612, %cst_726 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1614 = "mhlo.reshape"(%1613) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1615 = "mhlo.dot"(%1614, %cst_711) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1616 = chlo.broadcast_add %1615, %cst_712 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1617 = "mhlo.reshape"(%1616) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1618 = "mhlo.transpose"(%1617) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1619 = "mhlo.dot"(%1614, %cst_707) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1620 = "mhlo.reshape"(%1619) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1621 = "mhlo.broadcast_in_dim"(%cst_708) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1622 = mhlo.add %1620, %1621 : tensor<1x384x128xf32> | |
| %1623 = chlo.broadcast_multiply %1622, %cst_709 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1624 = chlo.broadcast_add %1623, %cst_710 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1625 = "mhlo.reshape"(%1624) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1626 = "mhlo.dot"(%1625, %cst_715) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1627 = chlo.broadcast_add %1626, %cst_716 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1628 = "mhlo.reshape"(%1627) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1629 = "mhlo.transpose"(%1628) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1630 = "mhlo.dot"(%1625, %cst_713) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1631 = chlo.broadcast_add %1630, %cst_714 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1632 = "mhlo.reshape"(%1631) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1633 = "mhlo.transpose"(%1632) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1634 = "mhlo.dot_general"(%1633, %1629) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1635 = chlo.broadcast_multiply %1634, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1636 = chlo.broadcast_add %1635, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1637 = "mhlo.reduce"(%1636, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1638 = linalg.tensor_expand_shape %1637 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1639 = chlo.broadcast_subtract %1636, %1638 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1640 = "mhlo.exponential"(%1639) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1641 = "mhlo.reduce"(%1640, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1642 = linalg.tensor_expand_shape %1641 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1643 = chlo.broadcast_divide %1640, %1642 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1644 = "mhlo.dot_general"(%1643, %1618) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1645 = "mhlo.transpose"(%1644) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1646 = "mhlo.reshape"(%1645) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1647 = "mhlo.dot"(%1646, %cst_717) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1648 = chlo.broadcast_add %1647, %cst_718 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1649 = "mhlo.reshape"(%1648) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1650 = "mhlo.dot"(%1614, %cst_704) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1651 = chlo.broadcast_add %1650, %cst_705 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1652 = "mhlo.reshape"(%1651) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1653 = chlo.broadcast_multiply %1652, %cst_706 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1654 = chlo.broadcast_add %1653, %cst_718 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1655 = chlo.broadcast_add %1649, %1654 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1656 = chlo.broadcast_multiply %1655, %cst_719 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1657 = chlo.broadcast_add %1656, %cst_720 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1658 = "mhlo.reshape"(%1657) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1659 = "mhlo.dot"(%1658, %cst_702) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1660 = chlo.broadcast_add %1659, %cst_703 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1661 = "mhlo.reshape"(%1660) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1662 = chlo.broadcast_maximum %1661, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1663 = "mhlo.reshape"(%1662) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1664 = "mhlo.dot"(%1663, %cst_698) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1665 = chlo.broadcast_add %1664, %cst_699 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1666 = "mhlo.reshape"(%1665) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1667 = chlo.broadcast_add %1666, %1657 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1668 = chlo.broadcast_multiply %1667, %cst_700 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1669 = chlo.broadcast_add %1668, %cst_701 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1670 = "mhlo.reshape"(%1669) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1671 = "mhlo.dot"(%1670, %cst_696) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1672 = chlo.broadcast_add %1671, %cst_697 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1673 = "mhlo.reshape"(%1672) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1674 = chlo.broadcast_maximum %1673, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1675 = "mhlo.reshape"(%1674) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1676 = "mhlo.dot"(%1675, %cst_692) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1677 = chlo.broadcast_add %1676, %cst_693 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1678 = "mhlo.reshape"(%1677) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1679 = chlo.broadcast_add %1678, %1669 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1680 = chlo.broadcast_multiply %1679, %cst_694 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1681 = chlo.broadcast_add %1680, %cst_695 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1682 = "mhlo.reshape"(%1681) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1683 = "mhlo.dot"(%1682, %cst_690) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1684 = chlo.broadcast_add %1683, %cst_691 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1685 = "mhlo.reshape"(%1684) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1686 = chlo.broadcast_maximum %1685, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1687 = "mhlo.reshape"(%1686) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1688 = "mhlo.dot"(%1687, %cst_686) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1689 = chlo.broadcast_add %1688, %cst_687 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1690 = "mhlo.reshape"(%1689) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1691 = chlo.broadcast_add %1690, %1681 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1692 = chlo.broadcast_multiply %1691, %cst_688 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1693 = chlo.broadcast_add %1692, %cst_689 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1694 = "mhlo.reshape"(%1693) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1695 = "mhlo.dot"(%1694, %cst_684) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1696 = chlo.broadcast_add %1695, %cst_685 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1697 = "mhlo.reshape"(%1696) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1698 = chlo.broadcast_maximum %1697, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1699 = "mhlo.reshape"(%1698) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1700 = "mhlo.dot"(%1699, %cst_676) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1701 = chlo.broadcast_add %1700, %cst_677 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1702 = "mhlo.reshape"(%1701) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1703 = chlo.broadcast_add %1702, %1693 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1704 = chlo.broadcast_multiply %1703, %cst_682 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1705 = chlo.broadcast_add %1704, %cst_683 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1706 = "mhlo.reshape"(%1705) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1707 = "mhlo.dot"(%1706, %cst_678) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1708 = chlo.broadcast_add %1707, %cst_679 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1709 = "mhlo.reshape"(%1708) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1710 = chlo.broadcast_add %1709, %1613 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1711 = chlo.broadcast_multiply %1710, %cst_680 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1712 = chlo.broadcast_add %1711, %cst_681 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1713 = "mhlo.reshape"(%1712) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1714 = "mhlo.dot"(%1713, %cst_666) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1715 = chlo.broadcast_add %1714, %cst_667 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1716 = "mhlo.reshape"(%1715) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1717 = "mhlo.transpose"(%1716) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1718 = "mhlo.dot"(%1713, %cst_662) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1719 = "mhlo.reshape"(%1718) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1720 = "mhlo.broadcast_in_dim"(%cst_663) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1721 = mhlo.add %1719, %1720 : tensor<1x384x128xf32> | |
| %1722 = chlo.broadcast_multiply %1721, %cst_664 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1723 = chlo.broadcast_add %1722, %cst_665 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1724 = "mhlo.reshape"(%1723) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1725 = "mhlo.dot"(%1724, %cst_670) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1726 = chlo.broadcast_add %1725, %cst_671 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1727 = "mhlo.reshape"(%1726) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1728 = "mhlo.transpose"(%1727) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1729 = "mhlo.dot"(%1724, %cst_668) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1730 = chlo.broadcast_add %1729, %cst_669 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1731 = "mhlo.reshape"(%1730) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1732 = "mhlo.transpose"(%1731) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1733 = "mhlo.dot_general"(%1732, %1728) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1734 = chlo.broadcast_multiply %1733, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1735 = chlo.broadcast_add %1734, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1736 = "mhlo.reduce"(%1735, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1737 = linalg.tensor_expand_shape %1736 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1738 = chlo.broadcast_subtract %1735, %1737 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1739 = "mhlo.exponential"(%1738) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1740 = "mhlo.reduce"(%1739, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1741 = linalg.tensor_expand_shape %1740 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1742 = chlo.broadcast_divide %1739, %1741 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1743 = "mhlo.dot_general"(%1742, %1717) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1744 = "mhlo.transpose"(%1743) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1745 = "mhlo.reshape"(%1744) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1746 = "mhlo.dot"(%1745, %cst_672) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1747 = chlo.broadcast_add %1746, %cst_673 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1748 = "mhlo.reshape"(%1747) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1749 = "mhlo.dot"(%1713, %cst_659) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1750 = chlo.broadcast_add %1749, %cst_660 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1751 = "mhlo.reshape"(%1750) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1752 = chlo.broadcast_multiply %1751, %cst_661 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1753 = chlo.broadcast_add %1752, %cst_673 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1754 = chlo.broadcast_add %1748, %1753 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1755 = chlo.broadcast_multiply %1754, %cst_674 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1756 = chlo.broadcast_add %1755, %cst_675 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1757 = "mhlo.reshape"(%1756) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1758 = "mhlo.dot"(%1757, %cst_657) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1759 = chlo.broadcast_add %1758, %cst_658 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1760 = "mhlo.reshape"(%1759) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1761 = chlo.broadcast_maximum %1760, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1762 = "mhlo.reshape"(%1761) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1763 = "mhlo.dot"(%1762, %cst_653) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1764 = chlo.broadcast_add %1763, %cst_654 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1765 = "mhlo.reshape"(%1764) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1766 = chlo.broadcast_add %1765, %1756 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1767 = chlo.broadcast_multiply %1766, %cst_655 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1768 = chlo.broadcast_add %1767, %cst_656 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1769 = "mhlo.reshape"(%1768) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1770 = "mhlo.dot"(%1769, %cst_651) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1771 = chlo.broadcast_add %1770, %cst_652 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1772 = "mhlo.reshape"(%1771) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1773 = chlo.broadcast_maximum %1772, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1774 = "mhlo.reshape"(%1773) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1775 = "mhlo.dot"(%1774, %cst_647) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1776 = chlo.broadcast_add %1775, %cst_648 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1777 = "mhlo.reshape"(%1776) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1778 = chlo.broadcast_add %1777, %1768 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1779 = chlo.broadcast_multiply %1778, %cst_649 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1780 = chlo.broadcast_add %1779, %cst_650 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1781 = "mhlo.reshape"(%1780) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1782 = "mhlo.dot"(%1781, %cst_645) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1783 = chlo.broadcast_add %1782, %cst_646 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1784 = "mhlo.reshape"(%1783) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1785 = chlo.broadcast_maximum %1784, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1786 = "mhlo.reshape"(%1785) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1787 = "mhlo.dot"(%1786, %cst_641) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1788 = chlo.broadcast_add %1787, %cst_642 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1789 = "mhlo.reshape"(%1788) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1790 = chlo.broadcast_add %1789, %1780 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1791 = chlo.broadcast_multiply %1790, %cst_643 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1792 = chlo.broadcast_add %1791, %cst_644 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1793 = "mhlo.reshape"(%1792) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1794 = "mhlo.dot"(%1793, %cst_639) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1795 = chlo.broadcast_add %1794, %cst_640 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1796 = "mhlo.reshape"(%1795) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1797 = chlo.broadcast_maximum %1796, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1798 = "mhlo.reshape"(%1797) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1799 = "mhlo.dot"(%1798, %cst_631) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1800 = chlo.broadcast_add %1799, %cst_632 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1801 = "mhlo.reshape"(%1800) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1802 = chlo.broadcast_add %1801, %1792 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1803 = chlo.broadcast_multiply %1802, %cst_637 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1804 = chlo.broadcast_add %1803, %cst_638 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1805 = "mhlo.reshape"(%1804) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1806 = "mhlo.dot"(%1805, %cst_633) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1807 = chlo.broadcast_add %1806, %cst_634 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1808 = "mhlo.reshape"(%1807) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1809 = chlo.broadcast_add %1808, %1712 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1810 = chlo.broadcast_multiply %1809, %cst_635 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1811 = chlo.broadcast_add %1810, %cst_636 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1812 = "mhlo.reshape"(%1811) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1813 = "mhlo.dot"(%1812, %cst_621) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1814 = chlo.broadcast_add %1813, %cst_622 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1815 = "mhlo.reshape"(%1814) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1816 = "mhlo.transpose"(%1815) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1817 = "mhlo.dot"(%1812, %cst_617) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1818 = "mhlo.reshape"(%1817) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1819 = "mhlo.broadcast_in_dim"(%cst_618) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1820 = mhlo.add %1818, %1819 : tensor<1x384x128xf32> | |
| %1821 = chlo.broadcast_multiply %1820, %cst_619 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1822 = chlo.broadcast_add %1821, %cst_620 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1823 = "mhlo.reshape"(%1822) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1824 = "mhlo.dot"(%1823, %cst_625) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1825 = chlo.broadcast_add %1824, %cst_626 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1826 = "mhlo.reshape"(%1825) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1827 = "mhlo.transpose"(%1826) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1828 = "mhlo.dot"(%1823, %cst_623) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1829 = chlo.broadcast_add %1828, %cst_624 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1830 = "mhlo.reshape"(%1829) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1831 = "mhlo.transpose"(%1830) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1832 = "mhlo.dot_general"(%1831, %1827) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1833 = chlo.broadcast_multiply %1832, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1834 = chlo.broadcast_add %1833, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1835 = "mhlo.reduce"(%1834, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1836 = linalg.tensor_expand_shape %1835 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1837 = chlo.broadcast_subtract %1834, %1836 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1838 = "mhlo.exponential"(%1837) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1839 = "mhlo.reduce"(%1838, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1840 = linalg.tensor_expand_shape %1839 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1841 = chlo.broadcast_divide %1838, %1840 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1842 = "mhlo.dot_general"(%1841, %1816) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1843 = "mhlo.transpose"(%1842) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1844 = "mhlo.reshape"(%1843) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1845 = "mhlo.dot"(%1844, %cst_627) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1846 = chlo.broadcast_add %1845, %cst_628 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1847 = "mhlo.reshape"(%1846) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1848 = "mhlo.dot"(%1812, %cst_614) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1849 = chlo.broadcast_add %1848, %cst_615 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1850 = "mhlo.reshape"(%1849) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1851 = chlo.broadcast_multiply %1850, %cst_616 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1852 = chlo.broadcast_add %1851, %cst_628 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1853 = chlo.broadcast_add %1847, %1852 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1854 = chlo.broadcast_multiply %1853, %cst_629 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1855 = chlo.broadcast_add %1854, %cst_630 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1856 = "mhlo.reshape"(%1855) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1857 = "mhlo.dot"(%1856, %cst_612) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1858 = chlo.broadcast_add %1857, %cst_613 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1859 = "mhlo.reshape"(%1858) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1860 = chlo.broadcast_maximum %1859, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1861 = "mhlo.reshape"(%1860) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1862 = "mhlo.dot"(%1861, %cst_608) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1863 = chlo.broadcast_add %1862, %cst_609 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1864 = "mhlo.reshape"(%1863) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1865 = chlo.broadcast_add %1864, %1855 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1866 = chlo.broadcast_multiply %1865, %cst_610 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1867 = chlo.broadcast_add %1866, %cst_611 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1868 = "mhlo.reshape"(%1867) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1869 = "mhlo.dot"(%1868, %cst_606) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1870 = chlo.broadcast_add %1869, %cst_607 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1871 = "mhlo.reshape"(%1870) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1872 = chlo.broadcast_maximum %1871, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1873 = "mhlo.reshape"(%1872) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1874 = "mhlo.dot"(%1873, %cst_602) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1875 = chlo.broadcast_add %1874, %cst_603 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1876 = "mhlo.reshape"(%1875) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1877 = chlo.broadcast_add %1876, %1867 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1878 = chlo.broadcast_multiply %1877, %cst_604 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1879 = chlo.broadcast_add %1878, %cst_605 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1880 = "mhlo.reshape"(%1879) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1881 = "mhlo.dot"(%1880, %cst_600) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1882 = chlo.broadcast_add %1881, %cst_601 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1883 = "mhlo.reshape"(%1882) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1884 = chlo.broadcast_maximum %1883, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1885 = "mhlo.reshape"(%1884) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1886 = "mhlo.dot"(%1885, %cst_596) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1887 = chlo.broadcast_add %1886, %cst_597 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1888 = "mhlo.reshape"(%1887) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1889 = chlo.broadcast_add %1888, %1879 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1890 = chlo.broadcast_multiply %1889, %cst_598 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1891 = chlo.broadcast_add %1890, %cst_599 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1892 = "mhlo.reshape"(%1891) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1893 = "mhlo.dot"(%1892, %cst_594) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1894 = chlo.broadcast_add %1893, %cst_595 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1895 = "mhlo.reshape"(%1894) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1896 = chlo.broadcast_maximum %1895, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1897 = "mhlo.reshape"(%1896) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1898 = "mhlo.dot"(%1897, %cst_586) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1899 = chlo.broadcast_add %1898, %cst_587 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1900 = "mhlo.reshape"(%1899) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1901 = chlo.broadcast_add %1900, %1891 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1902 = chlo.broadcast_multiply %1901, %cst_592 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1903 = chlo.broadcast_add %1902, %cst_593 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1904 = "mhlo.reshape"(%1903) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1905 = "mhlo.dot"(%1904, %cst_588) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1906 = chlo.broadcast_add %1905, %cst_589 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1907 = "mhlo.reshape"(%1906) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1908 = chlo.broadcast_add %1907, %1811 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1909 = chlo.broadcast_multiply %1908, %cst_590 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1910 = chlo.broadcast_add %1909, %cst_591 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1911 = "mhlo.reshape"(%1910) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1912 = "mhlo.dot"(%1911, %cst_576) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1913 = chlo.broadcast_add %1912, %cst_577 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1914 = "mhlo.reshape"(%1913) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1915 = "mhlo.transpose"(%1914) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1916 = "mhlo.dot"(%1911, %cst_572) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1917 = "mhlo.reshape"(%1916) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1918 = "mhlo.broadcast_in_dim"(%cst_573) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1919 = mhlo.add %1917, %1918 : tensor<1x384x128xf32> | |
| %1920 = chlo.broadcast_multiply %1919, %cst_574 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1921 = chlo.broadcast_add %1920, %cst_575 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1922 = "mhlo.reshape"(%1921) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1923 = "mhlo.dot"(%1922, %cst_580) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1924 = chlo.broadcast_add %1923, %cst_581 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1925 = "mhlo.reshape"(%1924) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1926 = "mhlo.transpose"(%1925) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1927 = "mhlo.dot"(%1922, %cst_578) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1928 = chlo.broadcast_add %1927, %cst_579 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1929 = "mhlo.reshape"(%1928) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1930 = "mhlo.transpose"(%1929) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1931 = "mhlo.dot_general"(%1930, %1926) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1932 = chlo.broadcast_multiply %1931, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1933 = chlo.broadcast_add %1932, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1934 = "mhlo.reduce"(%1933, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1935 = linalg.tensor_expand_shape %1934 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1936 = chlo.broadcast_subtract %1933, %1935 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1937 = "mhlo.exponential"(%1936) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1938 = "mhlo.reduce"(%1937, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1939 = linalg.tensor_expand_shape %1938 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1940 = chlo.broadcast_divide %1937, %1939 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1941 = "mhlo.dot_general"(%1940, %1915) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1942 = "mhlo.transpose"(%1941) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1943 = "mhlo.reshape"(%1942) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1944 = "mhlo.dot"(%1943, %cst_582) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1945 = chlo.broadcast_add %1944, %cst_583 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1946 = "mhlo.reshape"(%1945) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1947 = "mhlo.dot"(%1911, %cst_569) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1948 = chlo.broadcast_add %1947, %cst_570 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1949 = "mhlo.reshape"(%1948) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1950 = chlo.broadcast_multiply %1949, %cst_571 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1951 = chlo.broadcast_add %1950, %cst_583 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1952 = chlo.broadcast_add %1946, %1951 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1953 = chlo.broadcast_multiply %1952, %cst_584 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1954 = chlo.broadcast_add %1953, %cst_585 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1955 = "mhlo.reshape"(%1954) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1956 = "mhlo.dot"(%1955, %cst_567) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1957 = chlo.broadcast_add %1956, %cst_568 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1958 = "mhlo.reshape"(%1957) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1959 = chlo.broadcast_maximum %1958, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1960 = "mhlo.reshape"(%1959) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1961 = "mhlo.dot"(%1960, %cst_563) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1962 = chlo.broadcast_add %1961, %cst_564 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1963 = "mhlo.reshape"(%1962) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1964 = chlo.broadcast_add %1963, %1954 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1965 = chlo.broadcast_multiply %1964, %cst_565 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1966 = chlo.broadcast_add %1965, %cst_566 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1967 = "mhlo.reshape"(%1966) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1968 = "mhlo.dot"(%1967, %cst_561) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1969 = chlo.broadcast_add %1968, %cst_562 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1970 = "mhlo.reshape"(%1969) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1971 = chlo.broadcast_maximum %1970, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1972 = "mhlo.reshape"(%1971) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1973 = "mhlo.dot"(%1972, %cst_557) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1974 = chlo.broadcast_add %1973, %cst_558 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1975 = "mhlo.reshape"(%1974) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1976 = chlo.broadcast_add %1975, %1966 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1977 = chlo.broadcast_multiply %1976, %cst_559 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1978 = chlo.broadcast_add %1977, %cst_560 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1979 = "mhlo.reshape"(%1978) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1980 = "mhlo.dot"(%1979, %cst_555) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1981 = chlo.broadcast_add %1980, %cst_556 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1982 = "mhlo.reshape"(%1981) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1983 = chlo.broadcast_maximum %1982, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1984 = "mhlo.reshape"(%1983) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1985 = "mhlo.dot"(%1984, %cst_551) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1986 = chlo.broadcast_add %1985, %cst_552 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1987 = "mhlo.reshape"(%1986) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1988 = chlo.broadcast_add %1987, %1978 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1989 = chlo.broadcast_multiply %1988, %cst_553 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1990 = chlo.broadcast_add %1989, %cst_554 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1991 = "mhlo.reshape"(%1990) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1992 = "mhlo.dot"(%1991, %cst_549) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1993 = chlo.broadcast_add %1992, %cst_550 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1994 = "mhlo.reshape"(%1993) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1995 = chlo.broadcast_maximum %1994, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1996 = "mhlo.reshape"(%1995) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1997 = "mhlo.dot"(%1996, %cst_541) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1998 = chlo.broadcast_add %1997, %cst_542 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1999 = "mhlo.reshape"(%1998) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2000 = chlo.broadcast_add %1999, %1990 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2001 = chlo.broadcast_multiply %2000, %cst_547 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2002 = chlo.broadcast_add %2001, %cst_548 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2003 = "mhlo.reshape"(%2002) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2004 = "mhlo.dot"(%2003, %cst_543) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2005 = chlo.broadcast_add %2004, %cst_544 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2006 = "mhlo.reshape"(%2005) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2007 = chlo.broadcast_add %2006, %1910 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2008 = chlo.broadcast_multiply %2007, %cst_545 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2009 = chlo.broadcast_add %2008, %cst_546 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2010 = "mhlo.reshape"(%2009) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2011 = "mhlo.dot"(%2010, %cst_486) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2012 = chlo.broadcast_add %2011, %cst_487 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2013 = "mhlo.reshape"(%2012) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2014 = "mhlo.transpose"(%2013) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2015 = "mhlo.dot"(%2010, %cst_482) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2016 = "mhlo.reshape"(%2015) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2017 = "mhlo.broadcast_in_dim"(%cst_483) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2018 = mhlo.add %2016, %2017 : tensor<1x384x128xf32> | |
| %2019 = chlo.broadcast_multiply %2018, %cst_484 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2020 = chlo.broadcast_add %2019, %cst_485 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2021 = "mhlo.reshape"(%2020) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2022 = "mhlo.dot"(%2021, %cst_490) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2023 = chlo.broadcast_add %2022, %cst_491 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2024 = "mhlo.reshape"(%2023) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2025 = "mhlo.transpose"(%2024) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2026 = "mhlo.dot"(%2021, %cst_488) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2027 = chlo.broadcast_add %2026, %cst_489 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2028 = "mhlo.reshape"(%2027) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2029 = "mhlo.transpose"(%2028) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2030 = "mhlo.dot_general"(%2029, %2025) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %2031 = chlo.broadcast_multiply %2030, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %2032 = chlo.broadcast_add %2031, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2033 = "mhlo.reduce"(%2032, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2034 = linalg.tensor_expand_shape %2033 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2035 = chlo.broadcast_subtract %2032, %2034 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2036 = "mhlo.exponential"(%2035) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2037 = "mhlo.reduce"(%2036, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2038 = linalg.tensor_expand_shape %2037 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2039 = chlo.broadcast_divide %2036, %2038 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2040 = "mhlo.dot_general"(%2039, %2014) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2041 = "mhlo.transpose"(%2040) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %2042 = "mhlo.reshape"(%2041) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %2043 = "mhlo.dot"(%2042, %cst_492) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2044 = chlo.broadcast_add %2043, %cst_493 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2045 = "mhlo.reshape"(%2044) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2046 = "mhlo.dot"(%2010, %cst_479) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2047 = chlo.broadcast_add %2046, %cst_480 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2048 = "mhlo.reshape"(%2047) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2049 = chlo.broadcast_multiply %2048, %cst_481 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2050 = chlo.broadcast_add %2049, %cst_493 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2051 = chlo.broadcast_add %2045, %2050 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2052 = chlo.broadcast_multiply %2051, %cst_494 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2053 = chlo.broadcast_add %2052, %cst_495 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2054 = "mhlo.reshape"(%2053) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2055 = "mhlo.dot"(%2054, %cst_477) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2056 = chlo.broadcast_add %2055, %cst_478 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2057 = "mhlo.reshape"(%2056) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2058 = chlo.broadcast_maximum %2057, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2059 = "mhlo.reshape"(%2058) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2060 = "mhlo.dot"(%2059, %cst_473) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2061 = chlo.broadcast_add %2060, %cst_474 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2062 = "mhlo.reshape"(%2061) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2063 = chlo.broadcast_add %2062, %2053 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2064 = chlo.broadcast_multiply %2063, %cst_475 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2065 = chlo.broadcast_add %2064, %cst_476 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2066 = "mhlo.reshape"(%2065) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2067 = "mhlo.dot"(%2066, %cst_471) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2068 = chlo.broadcast_add %2067, %cst_472 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2069 = "mhlo.reshape"(%2068) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2070 = chlo.broadcast_maximum %2069, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2071 = "mhlo.reshape"(%2070) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2072 = "mhlo.dot"(%2071, %cst_467) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2073 = chlo.broadcast_add %2072, %cst_468 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2074 = "mhlo.reshape"(%2073) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2075 = chlo.broadcast_add %2074, %2065 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2076 = chlo.broadcast_multiply %2075, %cst_469 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2077 = chlo.broadcast_add %2076, %cst_470 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2078 = "mhlo.reshape"(%2077) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2079 = "mhlo.dot"(%2078, %cst_465) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2080 = chlo.broadcast_add %2079, %cst_466 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2081 = "mhlo.reshape"(%2080) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2082 = chlo.broadcast_maximum %2081, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2083 = "mhlo.reshape"(%2082) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2084 = "mhlo.dot"(%2083, %cst_461) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2085 = chlo.broadcast_add %2084, %cst_462 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2086 = "mhlo.reshape"(%2085) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2087 = chlo.broadcast_add %2086, %2077 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2088 = chlo.broadcast_multiply %2087, %cst_463 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2089 = chlo.broadcast_add %2088, %cst_464 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2090 = "mhlo.reshape"(%2089) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2091 = "mhlo.dot"(%2090, %cst_459) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2092 = chlo.broadcast_add %2091, %cst_460 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2093 = "mhlo.reshape"(%2092) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2094 = chlo.broadcast_maximum %2093, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2095 = "mhlo.reshape"(%2094) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2096 = "mhlo.dot"(%2095, %cst_451) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2097 = chlo.broadcast_add %2096, %cst_452 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2098 = "mhlo.reshape"(%2097) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2099 = chlo.broadcast_add %2098, %2089 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2100 = chlo.broadcast_multiply %2099, %cst_457 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2101 = chlo.broadcast_add %2100, %cst_458 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2102 = "mhlo.reshape"(%2101) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2103 = "mhlo.dot"(%2102, %cst_453) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2104 = chlo.broadcast_add %2103, %cst_454 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2105 = "mhlo.reshape"(%2104) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2106 = chlo.broadcast_add %2105, %2009 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2107 = chlo.broadcast_multiply %2106, %cst_455 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2108 = chlo.broadcast_add %2107, %cst_456 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2109 = "mhlo.reshape"(%2108) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2110 = "mhlo.dot"(%2109, %cst_441) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2111 = chlo.broadcast_add %2110, %cst_442 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2112 = "mhlo.reshape"(%2111) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2113 = "mhlo.transpose"(%2112) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2114 = "mhlo.dot"(%2109, %cst_437) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2115 = "mhlo.reshape"(%2114) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2116 = "mhlo.broadcast_in_dim"(%cst_438) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2117 = mhlo.add %2115, %2116 : tensor<1x384x128xf32> | |
| %2118 = chlo.broadcast_multiply %2117, %cst_439 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2119 = chlo.broadcast_add %2118, %cst_440 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2120 = "mhlo.reshape"(%2119) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2121 = "mhlo.dot"(%2120, %cst_445) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2122 = chlo.broadcast_add %2121, %cst_446 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2123 = "mhlo.reshape"(%2122) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2124 = "mhlo.transpose"(%2123) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2125 = "mhlo.dot"(%2120, %cst_443) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2126 = chlo.broadcast_add %2125, %cst_444 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2127 = "mhlo.reshape"(%2126) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2128 = "mhlo.transpose"(%2127) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2129 = "mhlo.dot_general"(%2128, %2124) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %2130 = chlo.broadcast_multiply %2129, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %2131 = chlo.broadcast_add %2130, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2132 = "mhlo.reduce"(%2131, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2133 = linalg.tensor_expand_shape %2132 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2134 = chlo.broadcast_subtract %2131, %2133 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2135 = "mhlo.exponential"(%2134) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2136 = "mhlo.reduce"(%2135, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2137 = linalg.tensor_expand_shape %2136 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2138 = chlo.broadcast_divide %2135, %2137 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2139 = "mhlo.dot_general"(%2138, %2113) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2140 = "mhlo.transpose"(%2139) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %2141 = "mhlo.reshape"(%2140) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %2142 = "mhlo.dot"(%2141, %cst_447) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2143 = chlo.broadcast_add %2142, %cst_448 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2144 = "mhlo.reshape"(%2143) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2145 = "mhlo.dot"(%2109, %cst_434) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2146 = chlo.broadcast_add %2145, %cst_435 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2147 = "mhlo.reshape"(%2146) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2148 = chlo.broadcast_multiply %2147, %cst_436 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2149 = chlo.broadcast_add %2148, %cst_448 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2150 = chlo.broadcast_add %2144, %2149 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2151 = chlo.broadcast_multiply %2150, %cst_449 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2152 = chlo.broadcast_add %2151, %cst_450 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2153 = "mhlo.reshape"(%2152) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2154 = "mhlo.dot"(%2153, %cst_432) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2155 = chlo.broadcast_add %2154, %cst_433 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2156 = "mhlo.reshape"(%2155) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2157 = chlo.broadcast_maximum %2156, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2158 = "mhlo.reshape"(%2157) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2159 = "mhlo.dot"(%2158, %cst_428) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2160 = chlo.broadcast_add %2159, %cst_429 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2161 = "mhlo.reshape"(%2160) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2162 = chlo.broadcast_add %2161, %2152 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2163 = chlo.broadcast_multiply %2162, %cst_430 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2164 = chlo.broadcast_add %2163, %cst_431 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2165 = "mhlo.reshape"(%2164) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2166 = "mhlo.dot"(%2165, %cst_426) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2167 = chlo.broadcast_add %2166, %cst_427 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2168 = "mhlo.reshape"(%2167) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2169 = chlo.broadcast_maximum %2168, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2170 = "mhlo.reshape"(%2169) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2171 = "mhlo.dot"(%2170, %cst_422) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2172 = chlo.broadcast_add %2171, %cst_423 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2173 = "mhlo.reshape"(%2172) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2174 = chlo.broadcast_add %2173, %2164 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2175 = chlo.broadcast_multiply %2174, %cst_424 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2176 = chlo.broadcast_add %2175, %cst_425 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2177 = "mhlo.reshape"(%2176) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2178 = "mhlo.dot"(%2177, %cst_420) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2179 = chlo.broadcast_add %2178, %cst_421 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2180 = "mhlo.reshape"(%2179) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2181 = chlo.broadcast_maximum %2180, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2182 = "mhlo.reshape"(%2181) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2183 = "mhlo.dot"(%2182, %cst_416) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2184 = chlo.broadcast_add %2183, %cst_417 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2185 = "mhlo.reshape"(%2184) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2186 = chlo.broadcast_add %2185, %2176 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2187 = chlo.broadcast_multiply %2186, %cst_418 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2188 = chlo.broadcast_add %2187, %cst_419 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2189 = "mhlo.reshape"(%2188) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2190 = "mhlo.dot"(%2189, %cst_414) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2191 = chlo.broadcast_add %2190, %cst_415 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2192 = "mhlo.reshape"(%2191) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2193 = chlo.broadcast_maximum %2192, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2194 = "mhlo.reshape"(%2193) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2195 = "mhlo.dot"(%2194, %cst_406) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2196 = chlo.broadcast_add %2195, %cst_407 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2197 = "mhlo.reshape"(%2196) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2198 = chlo.broadcast_add %2197, %2188 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2199 = chlo.broadcast_multiply %2198, %cst_412 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2200 = chlo.broadcast_add %2199, %cst_413 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2201 = "mhlo.reshape"(%2200) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2202 = "mhlo.dot"(%2201, %cst_408) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2203 = chlo.broadcast_add %2202, %cst_409 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2204 = "mhlo.reshape"(%2203) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2205 = chlo.broadcast_add %2204, %2108 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2206 = chlo.broadcast_multiply %2205, %cst_410 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2207 = chlo.broadcast_add %2206, %cst_411 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2208 = "mhlo.reshape"(%2207) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2209 = "mhlo.dot"(%2208, %cst_396) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2210 = chlo.broadcast_add %2209, %cst_397 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2211 = "mhlo.reshape"(%2210) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2212 = "mhlo.transpose"(%2211) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2213 = "mhlo.dot"(%2208, %cst_392) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2214 = "mhlo.reshape"(%2213) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2215 = "mhlo.broadcast_in_dim"(%cst_393) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2216 = mhlo.add %2214, %2215 : tensor<1x384x128xf32> | |
| %2217 = chlo.broadcast_multiply %2216, %cst_394 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2218 = chlo.broadcast_add %2217, %cst_395 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2219 = "mhlo.reshape"(%2218) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2220 = "mhlo.dot"(%2219, %cst_400) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2221 = chlo.broadcast_add %2220, %cst_401 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2222 = "mhlo.reshape"(%2221) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2223 = "mhlo.transpose"(%2222) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2224 = "mhlo.dot"(%2219, %cst_398) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2225 = chlo.broadcast_add %2224, %cst_399 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2226 = "mhlo.reshape"(%2225) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2227 = "mhlo.transpose"(%2226) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2228 = "mhlo.dot_general"(%2227, %2223) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %2229 = chlo.broadcast_multiply %2228, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %2230 = chlo.broadcast_add %2229, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2231 = "mhlo.reduce"(%2230, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2232 = linalg.tensor_expand_shape %2231 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2233 = chlo.broadcast_subtract %2230, %2232 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2234 = "mhlo.exponential"(%2233) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2235 = "mhlo.reduce"(%2234, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2236 = linalg.tensor_expand_shape %2235 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2237 = chlo.broadcast_divide %2234, %2236 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2238 = "mhlo.dot_general"(%2237, %2212) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2239 = "mhlo.transpose"(%2238) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %2240 = "mhlo.reshape"(%2239) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %2241 = "mhlo.dot"(%2240, %cst_402) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2242 = chlo.broadcast_add %2241, %cst_403 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2243 = "mhlo.reshape"(%2242) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2244 = "mhlo.dot"(%2208, %cst_389) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2245 = chlo.broadcast_add %2244, %cst_390 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2246 = "mhlo.reshape"(%2245) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2247 = chlo.broadcast_multiply %2246, %cst_391 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2248 = chlo.broadcast_add %2247, %cst_403 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2249 = chlo.broadcast_add %2243, %2248 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2250 = chlo.broadcast_multiply %2249, %cst_404 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2251 = chlo.broadcast_add %2250, %cst_405 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2252 = "mhlo.reshape"(%2251) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2253 = "mhlo.dot"(%2252, %cst_387) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2254 = chlo.broadcast_add %2253, %cst_388 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2255 = "mhlo.reshape"(%2254) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2256 = chlo.broadcast_maximum %2255, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2257 = "mhlo.reshape"(%2256) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2258 = "mhlo.dot"(%2257, %cst_383) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2259 = chlo.broadcast_add %2258, %cst_384 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2260 = "mhlo.reshape"(%2259) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2261 = chlo.broadcast_add %2260, %2251 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2262 = chlo.broadcast_multiply %2261, %cst_385 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2263 = chlo.broadcast_add %2262, %cst_386 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2264 = "mhlo.reshape"(%2263) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2265 = "mhlo.dot"(%2264, %cst_381) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2266 = chlo.broadcast_add %2265, %cst_382 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2267 = "mhlo.reshape"(%2266) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2268 = chlo.broadcast_maximum %2267, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2269 = "mhlo.reshape"(%2268) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2270 = "mhlo.dot"(%2269, %cst_377) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2271 = chlo.broadcast_add %2270, %cst_378 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2272 = "mhlo.reshape"(%2271) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2273 = chlo.broadcast_add %2272, %2263 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2274 = chlo.broadcast_multiply %2273, %cst_379 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2275 = chlo.broadcast_add %2274, %cst_380 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2276 = "mhlo.reshape"(%2275) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2277 = "mhlo.dot"(%2276, %cst_375) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2278 = chlo.broadcast_add %2277, %cst_376 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2279 = "mhlo.reshape"(%2278) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2280 = chlo.broadcast_maximum %2279, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2281 = "mhlo.reshape"(%2280) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2282 = "mhlo.dot"(%2281, %cst_371) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2283 = chlo.broadcast_add %2282, %cst_372 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2284 = "mhlo.reshape"(%2283) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2285 = chlo.broadcast_add %2284, %2275 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2286 = chlo.broadcast_multiply %2285, %cst_373 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2287 = chlo.broadcast_add %2286, %cst_374 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2288 = "mhlo.reshape"(%2287) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2289 = "mhlo.dot"(%2288, %cst_369) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2290 = chlo.broadcast_add %2289, %cst_370 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2291 = "mhlo.reshape"(%2290) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2292 = chlo.broadcast_maximum %2291, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2293 = "mhlo.reshape"(%2292) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2294 = "mhlo.dot"(%2293, %cst_361) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2295 = chlo.broadcast_add %2294, %cst_362 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2296 = "mhlo.reshape"(%2295) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2297 = chlo.broadcast_add %2296, %2287 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2298 = chlo.broadcast_multiply %2297, %cst_367 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2299 = chlo.broadcast_add %2298, %cst_368 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2300 = "mhlo.reshape"(%2299) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2301 = "mhlo.dot"(%2300, %cst_363) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2302 = chlo.broadcast_add %2301, %cst_364 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2303 = "mhlo.reshape"(%2302) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2304 = chlo.broadcast_add %2303, %2207 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2305 = chlo.broadcast_multiply %2304, %cst_365 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2306 = chlo.broadcast_add %2305, %cst_366 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2307 = "mhlo.reshape"(%2306) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2308 = "mhlo.dot"(%2307, %cst_351) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2309 = chlo.broadcast_add %2308, %cst_352 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2310 = "mhlo.reshape"(%2309) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2311 = "mhlo.transpose"(%2310) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2312 = "mhlo.dot"(%2307, %cst_347) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2313 = "mhlo.reshape"(%2312) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2314 = "mhlo.broadcast_in_dim"(%cst_348) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2315 = mhlo.add %2313, %2314 : tensor<1x384x128xf32> | |
| %2316 = chlo.broadcast_multiply %2315, %cst_349 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2317 = chlo.broadcast_add %2316, %cst_350 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2318 = "mhlo.reshape"(%2317) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2319 = "mhlo.dot"(%2318, %cst_355) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2320 = chlo.broadcast_add %2319, %cst_356 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2321 = "mhlo.reshape"(%2320) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2322 = "mhlo.transpose"(%2321) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2323 = "mhlo.dot"(%2318, %cst_353) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2324 = chlo.broadcast_add %2323, %cst_354 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2325 = "mhlo.reshape"(%2324) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2326 = "mhlo.transpose"(%2325) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2327 = "mhlo.dot_general"(%2326, %2322) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %2328 = chlo.broadcast_multiply %2327, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %2329 = chlo.broadcast_add %2328, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2330 = "mhlo.reduce"(%2329, %2) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2331 = linalg.tensor_expand_shape %2330 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2332 = chlo.broadcast_subtract %2329, %2331 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2333 = "mhlo.exponential"(%2332) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %2334 = "mhlo.reduce"(%2333, %1) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %2335 = linalg.tensor_expand_shape %2334 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %2336 = chlo.broadcast_divide %2333, %2335 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %2337 = "mhlo.dot_general"(%2336, %2311) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2338 = "mhlo.transpose"(%2337) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %2339 = "mhlo.reshape"(%2338) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %2340 = "mhlo.dot"(%2339, %cst_357) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2341 = chlo.broadcast_add %2340, %cst_358 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2342 = "mhlo.reshape"(%2341) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2343 = "mhlo.dot"(%2307, %cst_344) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2344 = chlo.broadcast_add %2343, %cst_345 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2345 = "mhlo.reshape"(%2344) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2346 = chlo.broadcast_multiply %2345, %cst_346 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2347 = chlo.broadcast_add %2346, %cst_358 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2348 = chlo.broadcast_add %2342, %2347 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2349 = chlo.broadcast_multiply %2348, %cst_359 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2350 = chlo.broadcast_add %2349, %cst_360 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2351 = "mhlo.reshape"(%2350) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2352 = "mhlo.dot"(%2351, %cst_342) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2353 = chlo.broadcast_add %2352, %cst_343 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2354 = "mhlo.reshape"(%2353) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2355 = chlo.broadcast_maximum %2354, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2356 = "mhlo.reshape"(%2355) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2357 = "mhlo.dot"(%2356, %cst_338) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2358 = chlo.broadcast_add %2357, %cst_339 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2359 = "mhlo.reshape"(%2358) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2360 = chlo.broadcast_add %2359, %2350 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2361 = chlo.broadcast_multiply %2360, %cst_340 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2362 = chlo.broadcast_add %2361, %cst_341 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2363 = "mhlo.reshape"(%2362) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2364 = "mhlo.dot"(%2363, %cst_336) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2365 = chlo.broadcast_add %2364, %cst_337 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2366 = "mhlo.reshape"(%2365) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2367 = chlo.broadcast_maximum %2366, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2368 = "mhlo.reshape"(%2367) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2369 = "mhlo.dot"(%2368, %cst_332) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2370 = chlo.broadcast_add %2369, %cst_333 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2371 = "mhlo.reshape"(%2370) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2372 = chlo.broadcast_add %2371, %2362 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2373 = chlo.broadcast_multiply %2372, %cst_334 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2374 = chlo.broadcast_add %2373, %cst_335 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2375 = "mhlo.reshape"(%2374) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2376 = "mhlo.dot"(%2375, %cst_330) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2377 = chlo.broadcast_add %2376, %cst_331 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2378 = "mhlo.reshape"(%2377) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2379 = chlo.broadcast_maximum %2378, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2380 = "mhlo.reshape"(%2379) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2381 = "mhlo.dot"(%2380, %cst_326) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2382 = chlo.broadcast_add %2381, %cst_327 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2383 = "mhlo.reshape"(%2382) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2384 = chlo.broadcast_add %2383, %2374 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2385 = chlo.broadcast_multiply %2384, %cst_328 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2386 = chlo.broadcast_add %2385, %cst_329 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2387 = "mhlo.reshape"(%2386) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2388 = "mhlo.dot"(%2387, %cst_324) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2389 = chlo.broadcast_add %2388, %cst_325 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2390 = "mhlo.reshape"(%2389) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2391 = chlo.broadcast_maximum %2390, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %2392 = "mhlo.reshape"(%2391) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2393 = "mhlo.dot"(%2392, %cst_316) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2394 = chlo.broadcast_add %2393, %cst_317 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2395 = "mhlo.reshape"(%2394) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2396 = chlo.broadcast_add %2395, %2386 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2397 = chlo.broadcast_multiply %2396, %cst_322 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2398 = chlo.broadcast_add %2397, %cst_323 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2399 = "mhlo.reshape"(%2398) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2400 = "mhlo.dot"(%2399, %cst_318) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2401 = chlo.broadcast_add %2400, %cst_319 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2402 = "mhlo.reshape"(%2401) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2403 = chlo.broadcast_add %2402, %2306 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2404 = chlo.broadcast_multiply %2403, %cst_320 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2405 = chlo.broadcast_add %2404, %cst_321 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2406 = "mhlo.reshape"(%2405) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2407 = "mhlo.transpose"(%cst) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<2x512xf32>) -> tensor<512x2xf32> | |
| %2408 = "mhlo.dot"(%2406, %2407) : (tensor<384x512xf32>, tensor<512x2xf32>) -> tensor<384x2xf32> | |
| %2409 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<2xf32>) -> tensor<384x2xf32> | |
| %2410 = mhlo.add %2408, %2409 : tensor<384x2xf32> | |
| %2411 = "mhlo.reshape"(%2410) : (tensor<384x2xf32>) -> tensor<1x384x2xf32> | |
| %2412 = "mhlo.transpose"(%2411) {permutation = dense<[2, 0, 1]> : tensor<3xi64>} : (tensor<1x384x2xf32>) -> tensor<2x1x384xf32> | |
| %2413 = "mhlo.slice"(%2412) {limit_indices = dense<[1, 1, 384]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<2x1x384xf32>) -> tensor<1x1x384xf32> | |
| %2414 = linalg.tensor_collapse_shape %2413 [[0], [1, 2]] : tensor<1x1x384xf32> into tensor<1x384xf32> | |
| %2415 = "mhlo.slice"(%2412) {limit_indices = dense<[2, 1, 384]> : tensor<3xi64>, start_indices = dense<[1, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<2x1x384xf32>) -> tensor<1x1x384xf32> | |
| %2416 = linalg.tensor_collapse_shape %2415 [[0], [1, 2]] : tensor<1x1x384xf32> into tensor<1x384xf32> | |
| return %2416, %2414 : tensor<1x384xf32>, tensor<1x384xf32> | |
| } | |
| } | |
| // -----// IR Dump After Canonicalizer //----- // | |
| builtin.func private @serving_default__ireesm(%arg0: tensor<1x384xi32>, %arg1: tensor<1x384xi32>, %arg2: tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>) attributes {tf.entry_function = {control_outputs = "", inputs = "segment_ids:0,input_mask:0,input_ids:0", outputs = "end_logits:0,start_logits:0"}} { | |
| %0 = mhlo.constant dense<1.000000e+00> : tensor<1x384x1xf32> | |
| %1 = mhlo.constant dense<1.000000e+04> : tensor<f32> | |
| %2 = mhlo.constant dense<0.176776692> : tensor<f32> | |
| %3 = mhlo.constant dense<-1.000000e+04> : tensor<f32> | |
| %4 = mhlo.constant dense<0xFF800000> : tensor<f32> | |
| %5 = mhlo.constant dense<0.000000e+00> : tensor<f32> | |
| %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32> | |
| %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1x384x512xf32> | |
| %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> | |
| %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32> | |
| %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_244 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_245 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_246 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_247 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_248 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_249 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_250 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_251 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_252 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_253 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_254 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_255 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_256 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_257 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_258 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_259 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_260 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_261 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_262 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_263 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_264 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_265 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_266 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_267 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_268 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_269 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_270 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_271 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_272 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_273 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_274 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_275 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_276 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_277 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_278 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_279 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_280 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_281 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_282 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_283 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_284 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_285 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_286 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_287 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_288 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_289 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_290 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_291 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_292 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_293 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_294 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_295 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_296 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_297 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_298 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_299 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_300 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_301 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_302 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_303 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_304 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_305 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_306 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_307 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_308 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_309 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_310 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_311 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_312 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_313 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_314 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_315 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_316 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_317 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_318 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_319 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_320 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_321 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_322 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_323 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_324 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_325 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_326 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_327 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_328 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_329 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_330 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_331 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_332 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_333 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_334 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_335 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_336 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_337 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_338 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_339 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_340 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_341 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_342 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_343 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_344 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_345 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_346 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_347 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_348 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_349 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_350 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_351 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_352 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_353 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_354 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_355 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_356 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_357 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_358 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_359 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_360 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_361 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_362 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_363 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_364 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_365 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_366 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_367 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_368 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_369 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_370 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_371 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_372 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_373 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_374 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_375 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_376 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_377 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_378 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_379 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_380 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_381 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_382 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_383 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_384 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_385 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_386 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_387 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_388 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_389 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_390 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_391 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_392 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_393 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_394 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_395 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_396 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_397 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_398 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_399 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_400 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_401 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_402 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_403 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_404 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_405 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_406 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_407 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_408 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_409 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_410 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_411 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_412 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_413 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_414 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_415 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_416 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_417 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_418 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_419 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_420 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_421 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_422 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_423 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_424 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_425 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_426 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_427 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_428 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_429 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_430 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_431 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_432 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_433 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_434 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_435 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_436 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_437 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_438 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_439 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_440 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_441 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_442 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_443 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_444 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_445 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_446 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_447 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_448 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_449 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_450 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_451 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_452 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_453 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_454 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_455 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_456 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_457 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_458 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_459 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_460 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_461 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_462 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_463 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_464 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_465 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_466 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_467 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_468 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_469 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_470 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_471 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_472 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_473 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_474 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_475 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_476 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_477 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_478 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_479 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_480 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_481 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_482 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_483 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_484 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_485 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_486 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_487 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_488 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_489 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_490 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_491 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_492 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_493 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_494 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_495 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_496 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_497 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_498 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_499 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_500 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_501 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_502 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_503 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_504 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_505 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_506 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_507 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_508 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_509 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_510 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_511 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_512 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_513 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_514 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_515 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_516 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_517 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_518 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_519 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_520 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_521 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_522 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_523 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_524 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_525 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_526 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_527 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_528 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_529 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_530 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_531 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_532 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_533 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_534 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_535 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_536 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_537 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_538 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_539 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_540 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_541 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_542 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_543 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_544 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_545 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_546 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_547 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_548 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_549 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_550 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_551 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_552 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_553 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_554 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_555 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_556 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_557 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_558 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_559 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_560 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_561 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_562 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_563 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_564 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_565 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_566 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_567 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_568 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_569 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_570 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_571 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_572 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_573 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_574 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_575 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_576 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_577 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_578 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_579 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_580 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_581 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_582 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_583 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_584 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_585 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_586 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_587 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_588 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_589 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_590 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_591 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_592 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_593 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_594 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_595 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_596 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_597 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_598 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_599 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_600 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_601 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_602 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_603 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_604 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_605 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_606 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_607 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_608 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_609 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_610 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_611 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_612 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_613 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_614 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_615 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_616 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_617 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_618 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_619 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_620 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_621 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_622 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_623 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_624 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_625 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_626 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_627 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_628 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_629 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_630 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_631 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_632 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_633 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_634 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_635 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_636 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_637 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_638 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_639 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_640 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_641 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_642 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_643 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_644 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_645 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_646 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_647 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_648 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_649 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_650 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_651 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_652 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_653 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_654 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_655 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_656 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_657 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_658 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_659 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_660 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_661 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_662 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_663 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_664 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_665 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_666 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_667 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_668 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_669 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_670 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_671 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_672 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_673 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_674 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_675 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_676 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_677 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_678 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_679 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_680 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_681 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_682 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_683 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_684 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_685 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_686 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_687 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_688 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_689 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_690 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_691 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_692 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_693 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_694 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_695 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_696 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_697 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_698 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_699 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_700 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_701 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_702 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_703 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_704 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_705 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_706 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_707 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_708 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_709 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_710 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_711 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_712 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_713 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_714 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_715 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_716 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_717 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_718 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_719 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_720 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_721 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_722 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_723 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_724 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_725 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_726 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_727 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_728 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_729 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_730 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_731 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_732 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_733 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_734 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_735 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_736 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_737 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_738 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_739 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_740 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_741 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_742 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_743 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_744 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_745 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_746 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_747 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_748 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_749 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_750 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_751 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_752 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_753 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_754 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_755 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_756 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_757 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_758 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_759 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_760 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_761 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_762 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_763 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_764 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_765 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_766 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_767 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_768 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_769 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_770 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_771 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_772 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_773 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_774 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_775 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_776 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_777 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_778 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_779 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_780 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_781 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_782 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_783 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_784 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_785 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_786 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_787 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_788 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_789 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_790 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_791 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_792 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_793 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_794 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_795 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_796 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_797 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_798 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_799 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_800 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_801 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_802 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_803 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_804 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_805 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_806 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_807 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_808 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_809 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_810 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_811 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_812 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_813 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_814 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_815 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_816 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_817 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_818 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_819 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_820 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_821 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_822 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_823 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_824 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_825 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_826 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_827 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_828 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_829 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_830 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_831 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_832 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_833 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_834 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_835 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_836 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_837 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_838 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_839 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_840 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_841 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_842 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_843 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_844 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_845 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_846 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_847 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_848 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_849 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_850 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_851 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_852 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_853 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_854 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_855 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_856 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_857 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_858 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_859 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_860 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_861 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_862 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_863 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_864 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_865 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_866 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_867 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_868 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_869 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_870 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_871 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_872 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_873 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_874 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_875 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_876 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_877 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_878 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_879 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_880 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_881 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_882 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_883 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_884 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_885 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_886 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_887 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_888 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_889 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_890 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_891 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_892 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_893 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_894 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_895 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_896 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_897 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_898 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_899 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_900 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_901 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_902 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_903 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_904 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_905 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_906 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_907 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_908 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_909 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_910 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_911 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_912 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_913 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_914 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_915 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_916 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_917 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_918 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_919 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_920 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_921 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_922 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_923 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_924 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_925 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_926 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_927 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_928 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_929 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_930 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_931 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_932 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_933 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_934 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_935 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_936 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_937 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_938 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_939 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_940 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_941 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_942 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_943 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_944 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_945 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_946 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_947 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_948 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_949 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_950 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_951 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_952 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_953 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_954 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_955 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_956 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_957 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_958 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_959 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_960 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_961 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_962 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_963 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_964 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_965 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_966 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_967 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_968 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_969 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_970 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_971 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_972 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_973 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_974 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_975 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_976 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_977 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_978 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_979 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_980 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_981 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_982 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_983 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_984 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_985 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_986 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_987 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_988 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_989 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_990 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_991 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_992 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_993 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_994 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_995 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_996 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_997 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_998 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_999 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1000 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1001 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1002 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1003 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1004 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1005 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1006 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1007 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1008 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1009 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1010 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1011 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1012 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1013 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1014 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1015 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1016 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1017 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1018 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1019 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1020 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1021 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1022 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1023 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1024 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1025 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1026 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1027 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1028 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1029 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1030 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1031 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1032 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1033 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1034 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1035 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1036 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1037 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1038 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1039 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1040 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1041 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1042 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1043 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1044 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1045 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1046 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1047 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> | |
| %cst_1048 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1049 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1050 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1051 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1052 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1053 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1054 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1055 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1056 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1057 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1058 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1059 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1060 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1061 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1062 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1063 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1064 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1065 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1066 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1067 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1068 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1069 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1070 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1071 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1072 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1073 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1074 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1075 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1076 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1077 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1078 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1079 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1080 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1081 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32> | |
| %cst_1082 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> | |
| %cst_1083 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32> | |
| %cst_1084 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> | |
| %cst_1085 = constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> | |
| %cst_1086 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> | |
| %7 = linalg.tensor_expand_shape %arg2 [[0], [1, 2]] : tensor<1x384xi32> into tensor<1x384x1xi32> | |
| %8 = "mhlo.torch_index_select"(%cst_4, %7) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<30522x128xf32>, tensor<1x384x1xi32>) -> tensor<1x384x1x128xf32> | |
| %9 = "mhlo.reshape"(%8) : (tensor<1x384x1x128xf32>) -> tensor<1x384x128xf32> | |
| %10 = "mhlo.slice"(%9) {limit_indices = dense<[1, 384, 128]> : tensor<3xi64>, start_indices = dense<[0, 1, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32> | |
| %11 = "mhlo.pad"(%10, %5) {edge_padding_high = dense<[0, 1, 0]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32> | |
| %12 = "mhlo.slice"(%9) {limit_indices = dense<[1, 383, 128]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32> | |
| %13 = "mhlo.pad"(%12, %5) {edge_padding_high = dense<0> : tensor<3xi64>, edge_padding_low = dense<[0, 1, 0]> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32> | |
| %14 = "mhlo.concatenate"(%11, %9, %13) {dimension = 2 : i64} : (tensor<1x384x128xf32>, tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x384xf32> | |
| %15 = "mhlo.reshape"(%14) : (tensor<1x384x384xf32>) -> tensor<384x384xf32> | |
| %16 = "mhlo.dot"(%15, %cst_2) : (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32> | |
| %17 = chlo.broadcast_add %16, %cst_1 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %18 = "mhlo.reshape"(%17) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %19 = "mhlo.convert"(%arg1) : (tensor<1x384xi32>) -> tensor<1x384xf32> | |
| %20 = "mhlo.reshape"(%19) : (tensor<1x384xf32>) -> tensor<1x1x384xf32> | |
| %21 = chlo.broadcast_multiply %20, %0 : (tensor<1x1x384xf32>, tensor<1x384x1xf32>) -> tensor<1x384x384xf32> | |
| %22 = linalg.tensor_expand_shape %21 [[0], [1, 2], [3]] : tensor<1x384x384xf32> into tensor<1x1x384x384xf32> | |
| %23 = chlo.broadcast_multiply %22, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32> | |
| %24 = chlo.broadcast_add %23, %3 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32> | |
| %25 = "mhlo.torch_index_select"(%cst_3, %arg0) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<2x512xf32>, tensor<1x384xi32>) -> tensor<1x384x512xf32> | |
| %26 = chlo.broadcast_add %18, %25 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %27 = chlo.broadcast_add %26, %6 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %28 = chlo.broadcast_multiply %27, %cst_0 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %29 = chlo.broadcast_add %28, %cst {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %30 = "mhlo.reshape"(%29) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %31 = "mhlo.dot"(%30, %cst_14) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %32 = chlo.broadcast_add %31, %cst_13 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %33 = "mhlo.reshape"(%32) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %34 = "mhlo.transpose"(%33) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %35 = "mhlo.dot"(%30, %cst_18) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %36 = "mhlo.reshape"(%35) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %37 = "mhlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %38 = mhlo.add %36, %37 : tensor<1x384x128xf32> | |
| %39 = chlo.broadcast_multiply %38, %cst_16 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %40 = chlo.broadcast_add %39, %cst_15 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %41 = "mhlo.reshape"(%40) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %42 = "mhlo.dot"(%41, %cst_10) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %43 = chlo.broadcast_add %42, %cst_9 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %44 = "mhlo.reshape"(%43) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %45 = "mhlo.transpose"(%44) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %46 = "mhlo.dot"(%41, %cst_12) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %47 = chlo.broadcast_add %46, %cst_11 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %48 = "mhlo.reshape"(%47) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %49 = "mhlo.transpose"(%48) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %50 = "mhlo.dot_general"(%49, %45) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %51 = chlo.broadcast_multiply %50, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %52 = chlo.broadcast_add %51, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %53 = "mhlo.reduce"(%52, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %54 = linalg.tensor_expand_shape %53 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %55 = chlo.broadcast_subtract %52, %54 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %56 = "mhlo.exponential"(%55) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %57 = "mhlo.reduce"(%56, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %58 = linalg.tensor_expand_shape %57 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %59 = chlo.broadcast_divide %56, %58 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %60 = "mhlo.dot_general"(%59, %34) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %61 = "mhlo.transpose"(%60) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %62 = "mhlo.reshape"(%61) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %63 = "mhlo.dot"(%62, %cst_8) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %64 = chlo.broadcast_add %63, %cst_7 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %65 = "mhlo.reshape"(%64) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %66 = "mhlo.dot"(%30, %cst_21) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %67 = chlo.broadcast_add %66, %cst_20 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %68 = "mhlo.reshape"(%67) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %69 = chlo.broadcast_multiply %68, %cst_19 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %70 = chlo.broadcast_add %69, %cst_7 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %71 = chlo.broadcast_add %65, %70 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %72 = chlo.broadcast_multiply %71, %cst_6 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %73 = chlo.broadcast_add %72, %cst_5 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %74 = "mhlo.reshape"(%73) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %75 = "mhlo.dot"(%74, %cst_23) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %76 = chlo.broadcast_add %75, %cst_22 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %77 = "mhlo.reshape"(%76) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %78 = chlo.broadcast_maximum %77, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %79 = "mhlo.reshape"(%78) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %80 = "mhlo.dot"(%79, %cst_27) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %81 = chlo.broadcast_add %80, %cst_26 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %82 = "mhlo.reshape"(%81) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %83 = chlo.broadcast_add %82, %73 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %84 = chlo.broadcast_multiply %83, %cst_25 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %85 = chlo.broadcast_add %84, %cst_24 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %86 = "mhlo.reshape"(%85) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %87 = "mhlo.dot"(%86, %cst_29) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %88 = chlo.broadcast_add %87, %cst_28 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %89 = "mhlo.reshape"(%88) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %90 = chlo.broadcast_maximum %89, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %91 = "mhlo.reshape"(%90) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %92 = "mhlo.dot"(%91, %cst_33) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %93 = chlo.broadcast_add %92, %cst_32 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %94 = "mhlo.reshape"(%93) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %95 = chlo.broadcast_add %94, %85 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %96 = chlo.broadcast_multiply %95, %cst_31 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %97 = chlo.broadcast_add %96, %cst_30 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %98 = "mhlo.reshape"(%97) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %99 = "mhlo.dot"(%98, %cst_35) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %100 = chlo.broadcast_add %99, %cst_34 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %101 = "mhlo.reshape"(%100) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %102 = chlo.broadcast_maximum %101, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %103 = "mhlo.reshape"(%102) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %104 = "mhlo.dot"(%103, %cst_39) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %105 = chlo.broadcast_add %104, %cst_38 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %106 = "mhlo.reshape"(%105) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %107 = chlo.broadcast_add %106, %97 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %108 = chlo.broadcast_multiply %107, %cst_37 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %109 = chlo.broadcast_add %108, %cst_36 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %110 = "mhlo.reshape"(%109) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %111 = "mhlo.dot"(%110, %cst_41) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %112 = chlo.broadcast_add %111, %cst_40 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %113 = "mhlo.reshape"(%112) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %114 = chlo.broadcast_maximum %113, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %115 = "mhlo.reshape"(%114) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %116 = "mhlo.dot"(%115, %cst_49) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %117 = chlo.broadcast_add %116, %cst_48 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %118 = "mhlo.reshape"(%117) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %119 = chlo.broadcast_add %118, %109 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %120 = chlo.broadcast_multiply %119, %cst_43 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %121 = chlo.broadcast_add %120, %cst_42 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %122 = "mhlo.reshape"(%121) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %123 = "mhlo.dot"(%122, %cst_47) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %124 = chlo.broadcast_add %123, %cst_46 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %125 = "mhlo.reshape"(%124) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %126 = chlo.broadcast_add %125, %29 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %127 = chlo.broadcast_multiply %126, %cst_45 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %128 = chlo.broadcast_add %127, %cst_44 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %129 = "mhlo.reshape"(%128) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %130 = "mhlo.dot"(%129, %cst_59) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %131 = chlo.broadcast_add %130, %cst_58 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %132 = "mhlo.reshape"(%131) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %133 = "mhlo.transpose"(%132) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %134 = "mhlo.dot"(%129, %cst_63) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %135 = "mhlo.reshape"(%134) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %136 = "mhlo.broadcast_in_dim"(%cst_62) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %137 = mhlo.add %135, %136 : tensor<1x384x128xf32> | |
| %138 = chlo.broadcast_multiply %137, %cst_61 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %139 = chlo.broadcast_add %138, %cst_60 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %140 = "mhlo.reshape"(%139) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %141 = "mhlo.dot"(%140, %cst_55) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %142 = chlo.broadcast_add %141, %cst_54 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %143 = "mhlo.reshape"(%142) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %144 = "mhlo.transpose"(%143) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %145 = "mhlo.dot"(%140, %cst_57) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %146 = chlo.broadcast_add %145, %cst_56 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %147 = "mhlo.reshape"(%146) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %148 = "mhlo.transpose"(%147) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %149 = "mhlo.dot_general"(%148, %144) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %150 = chlo.broadcast_multiply %149, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %151 = chlo.broadcast_add %150, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %152 = "mhlo.reduce"(%151, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %153 = linalg.tensor_expand_shape %152 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %154 = chlo.broadcast_subtract %151, %153 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %155 = "mhlo.exponential"(%154) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %156 = "mhlo.reduce"(%155, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %157 = linalg.tensor_expand_shape %156 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %158 = chlo.broadcast_divide %155, %157 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %159 = "mhlo.dot_general"(%158, %133) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %160 = "mhlo.transpose"(%159) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %161 = "mhlo.reshape"(%160) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %162 = "mhlo.dot"(%161, %cst_53) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %163 = chlo.broadcast_add %162, %cst_52 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %164 = "mhlo.reshape"(%163) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %165 = "mhlo.dot"(%129, %cst_66) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %166 = chlo.broadcast_add %165, %cst_65 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %167 = "mhlo.reshape"(%166) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %168 = chlo.broadcast_multiply %167, %cst_64 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %169 = chlo.broadcast_add %168, %cst_52 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %170 = chlo.broadcast_add %164, %169 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %171 = chlo.broadcast_multiply %170, %cst_51 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %172 = chlo.broadcast_add %171, %cst_50 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %173 = "mhlo.reshape"(%172) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %174 = "mhlo.dot"(%173, %cst_68) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %175 = chlo.broadcast_add %174, %cst_67 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %176 = "mhlo.reshape"(%175) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %177 = chlo.broadcast_maximum %176, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %178 = "mhlo.reshape"(%177) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %179 = "mhlo.dot"(%178, %cst_72) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %180 = chlo.broadcast_add %179, %cst_71 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %181 = "mhlo.reshape"(%180) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %182 = chlo.broadcast_add %181, %172 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %183 = chlo.broadcast_multiply %182, %cst_70 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %184 = chlo.broadcast_add %183, %cst_69 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %185 = "mhlo.reshape"(%184) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %186 = "mhlo.dot"(%185, %cst_74) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %187 = chlo.broadcast_add %186, %cst_73 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %188 = "mhlo.reshape"(%187) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %189 = chlo.broadcast_maximum %188, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %190 = "mhlo.reshape"(%189) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %191 = "mhlo.dot"(%190, %cst_78) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %192 = chlo.broadcast_add %191, %cst_77 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %193 = "mhlo.reshape"(%192) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %194 = chlo.broadcast_add %193, %184 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %195 = chlo.broadcast_multiply %194, %cst_76 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %196 = chlo.broadcast_add %195, %cst_75 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %197 = "mhlo.reshape"(%196) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %198 = "mhlo.dot"(%197, %cst_80) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %199 = chlo.broadcast_add %198, %cst_79 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %200 = "mhlo.reshape"(%199) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %201 = chlo.broadcast_maximum %200, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %202 = "mhlo.reshape"(%201) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %203 = "mhlo.dot"(%202, %cst_84) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %204 = chlo.broadcast_add %203, %cst_83 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %205 = "mhlo.reshape"(%204) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %206 = chlo.broadcast_add %205, %196 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %207 = chlo.broadcast_multiply %206, %cst_82 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %208 = chlo.broadcast_add %207, %cst_81 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %209 = "mhlo.reshape"(%208) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %210 = "mhlo.dot"(%209, %cst_86) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %211 = chlo.broadcast_add %210, %cst_85 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %212 = "mhlo.reshape"(%211) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %213 = chlo.broadcast_maximum %212, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %214 = "mhlo.reshape"(%213) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %215 = "mhlo.dot"(%214, %cst_94) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %216 = chlo.broadcast_add %215, %cst_93 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %217 = "mhlo.reshape"(%216) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %218 = chlo.broadcast_add %217, %208 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %219 = chlo.broadcast_multiply %218, %cst_88 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %220 = chlo.broadcast_add %219, %cst_87 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %221 = "mhlo.reshape"(%220) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %222 = "mhlo.dot"(%221, %cst_92) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %223 = chlo.broadcast_add %222, %cst_91 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %224 = "mhlo.reshape"(%223) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %225 = chlo.broadcast_add %224, %128 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %226 = chlo.broadcast_multiply %225, %cst_90 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %227 = chlo.broadcast_add %226, %cst_89 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %228 = "mhlo.reshape"(%227) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %229 = "mhlo.dot"(%228, %cst_554) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %230 = chlo.broadcast_add %229, %cst_553 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %231 = "mhlo.reshape"(%230) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %232 = "mhlo.transpose"(%231) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %233 = "mhlo.dot"(%228, %cst_558) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %234 = "mhlo.reshape"(%233) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %235 = "mhlo.broadcast_in_dim"(%cst_557) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %236 = mhlo.add %234, %235 : tensor<1x384x128xf32> | |
| %237 = chlo.broadcast_multiply %236, %cst_556 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %238 = chlo.broadcast_add %237, %cst_555 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %239 = "mhlo.reshape"(%238) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %240 = "mhlo.dot"(%239, %cst_550) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %241 = chlo.broadcast_add %240, %cst_549 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %242 = "mhlo.reshape"(%241) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %243 = "mhlo.transpose"(%242) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %244 = "mhlo.dot"(%239, %cst_552) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %245 = chlo.broadcast_add %244, %cst_551 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %246 = "mhlo.reshape"(%245) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %247 = "mhlo.transpose"(%246) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %248 = "mhlo.dot_general"(%247, %243) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %249 = chlo.broadcast_multiply %248, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %250 = chlo.broadcast_add %249, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %251 = "mhlo.reduce"(%250, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %252 = linalg.tensor_expand_shape %251 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %253 = chlo.broadcast_subtract %250, %252 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %254 = "mhlo.exponential"(%253) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %255 = "mhlo.reduce"(%254, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %256 = linalg.tensor_expand_shape %255 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %257 = chlo.broadcast_divide %254, %256 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %258 = "mhlo.dot_general"(%257, %232) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %259 = "mhlo.transpose"(%258) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %260 = "mhlo.reshape"(%259) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %261 = "mhlo.dot"(%260, %cst_548) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %262 = chlo.broadcast_add %261, %cst_547 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %263 = "mhlo.reshape"(%262) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %264 = "mhlo.dot"(%228, %cst_561) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %265 = chlo.broadcast_add %264, %cst_560 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %266 = "mhlo.reshape"(%265) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %267 = chlo.broadcast_multiply %266, %cst_559 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %268 = chlo.broadcast_add %267, %cst_547 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %269 = chlo.broadcast_add %263, %268 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %270 = chlo.broadcast_multiply %269, %cst_546 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %271 = chlo.broadcast_add %270, %cst_545 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %272 = "mhlo.reshape"(%271) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %273 = "mhlo.dot"(%272, %cst_563) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %274 = chlo.broadcast_add %273, %cst_562 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %275 = "mhlo.reshape"(%274) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %276 = chlo.broadcast_maximum %275, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %277 = "mhlo.reshape"(%276) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %278 = "mhlo.dot"(%277, %cst_567) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %279 = chlo.broadcast_add %278, %cst_566 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %280 = "mhlo.reshape"(%279) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %281 = chlo.broadcast_add %280, %271 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %282 = chlo.broadcast_multiply %281, %cst_565 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %283 = chlo.broadcast_add %282, %cst_564 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %284 = "mhlo.reshape"(%283) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %285 = "mhlo.dot"(%284, %cst_569) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %286 = chlo.broadcast_add %285, %cst_568 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %287 = "mhlo.reshape"(%286) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %288 = chlo.broadcast_maximum %287, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %289 = "mhlo.reshape"(%288) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %290 = "mhlo.dot"(%289, %cst_573) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %291 = chlo.broadcast_add %290, %cst_572 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %292 = "mhlo.reshape"(%291) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %293 = chlo.broadcast_add %292, %283 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %294 = chlo.broadcast_multiply %293, %cst_571 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %295 = chlo.broadcast_add %294, %cst_570 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %296 = "mhlo.reshape"(%295) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %297 = "mhlo.dot"(%296, %cst_575) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %298 = chlo.broadcast_add %297, %cst_574 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %299 = "mhlo.reshape"(%298) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %300 = chlo.broadcast_maximum %299, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %301 = "mhlo.reshape"(%300) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %302 = "mhlo.dot"(%301, %cst_579) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %303 = chlo.broadcast_add %302, %cst_578 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %304 = "mhlo.reshape"(%303) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %305 = chlo.broadcast_add %304, %295 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %306 = chlo.broadcast_multiply %305, %cst_577 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %307 = chlo.broadcast_add %306, %cst_576 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %308 = "mhlo.reshape"(%307) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %309 = "mhlo.dot"(%308, %cst_581) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %310 = chlo.broadcast_add %309, %cst_580 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %311 = "mhlo.reshape"(%310) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %312 = chlo.broadcast_maximum %311, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %313 = "mhlo.reshape"(%312) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %314 = "mhlo.dot"(%313, %cst_589) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %315 = chlo.broadcast_add %314, %cst_588 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %316 = "mhlo.reshape"(%315) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %317 = chlo.broadcast_add %316, %307 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %318 = chlo.broadcast_multiply %317, %cst_583 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %319 = chlo.broadcast_add %318, %cst_582 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %320 = "mhlo.reshape"(%319) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %321 = "mhlo.dot"(%320, %cst_587) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %322 = chlo.broadcast_add %321, %cst_586 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %323 = "mhlo.reshape"(%322) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %324 = chlo.broadcast_add %323, %227 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %325 = chlo.broadcast_multiply %324, %cst_585 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %326 = chlo.broadcast_add %325, %cst_584 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %327 = "mhlo.reshape"(%326) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %328 = "mhlo.dot"(%327, %cst_779) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %329 = chlo.broadcast_add %328, %cst_778 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %330 = "mhlo.reshape"(%329) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %331 = "mhlo.transpose"(%330) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %332 = "mhlo.dot"(%327, %cst_783) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %333 = "mhlo.reshape"(%332) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %334 = "mhlo.broadcast_in_dim"(%cst_782) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %335 = mhlo.add %333, %334 : tensor<1x384x128xf32> | |
| %336 = chlo.broadcast_multiply %335, %cst_781 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %337 = chlo.broadcast_add %336, %cst_780 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %338 = "mhlo.reshape"(%337) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %339 = "mhlo.dot"(%338, %cst_775) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %340 = chlo.broadcast_add %339, %cst_774 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %341 = "mhlo.reshape"(%340) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %342 = "mhlo.transpose"(%341) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %343 = "mhlo.dot"(%338, %cst_777) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %344 = chlo.broadcast_add %343, %cst_776 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %345 = "mhlo.reshape"(%344) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %346 = "mhlo.transpose"(%345) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %347 = "mhlo.dot_general"(%346, %342) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %348 = chlo.broadcast_multiply %347, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %349 = chlo.broadcast_add %348, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %350 = "mhlo.reduce"(%349, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %351 = linalg.tensor_expand_shape %350 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %352 = chlo.broadcast_subtract %349, %351 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %353 = "mhlo.exponential"(%352) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %354 = "mhlo.reduce"(%353, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %355 = linalg.tensor_expand_shape %354 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %356 = chlo.broadcast_divide %353, %355 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %357 = "mhlo.dot_general"(%356, %331) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %358 = "mhlo.transpose"(%357) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %359 = "mhlo.reshape"(%358) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %360 = "mhlo.dot"(%359, %cst_773) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %361 = chlo.broadcast_add %360, %cst_772 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %362 = "mhlo.reshape"(%361) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %363 = "mhlo.dot"(%327, %cst_786) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %364 = chlo.broadcast_add %363, %cst_785 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %365 = "mhlo.reshape"(%364) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %366 = chlo.broadcast_multiply %365, %cst_784 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %367 = chlo.broadcast_add %366, %cst_772 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %368 = chlo.broadcast_add %362, %367 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %369 = chlo.broadcast_multiply %368, %cst_771 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %370 = chlo.broadcast_add %369, %cst_770 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %371 = "mhlo.reshape"(%370) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %372 = "mhlo.dot"(%371, %cst_788) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %373 = chlo.broadcast_add %372, %cst_787 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %374 = "mhlo.reshape"(%373) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %375 = chlo.broadcast_maximum %374, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %376 = "mhlo.reshape"(%375) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %377 = "mhlo.dot"(%376, %cst_792) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %378 = chlo.broadcast_add %377, %cst_791 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %379 = "mhlo.reshape"(%378) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %380 = chlo.broadcast_add %379, %370 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %381 = chlo.broadcast_multiply %380, %cst_790 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %382 = chlo.broadcast_add %381, %cst_789 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %383 = "mhlo.reshape"(%382) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %384 = "mhlo.dot"(%383, %cst_794) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %385 = chlo.broadcast_add %384, %cst_793 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %386 = "mhlo.reshape"(%385) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %387 = chlo.broadcast_maximum %386, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %388 = "mhlo.reshape"(%387) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %389 = "mhlo.dot"(%388, %cst_798) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %390 = chlo.broadcast_add %389, %cst_797 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %391 = "mhlo.reshape"(%390) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %392 = chlo.broadcast_add %391, %382 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %393 = chlo.broadcast_multiply %392, %cst_796 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %394 = chlo.broadcast_add %393, %cst_795 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %395 = "mhlo.reshape"(%394) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %396 = "mhlo.dot"(%395, %cst_800) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %397 = chlo.broadcast_add %396, %cst_799 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %398 = "mhlo.reshape"(%397) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %399 = chlo.broadcast_maximum %398, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %400 = "mhlo.reshape"(%399) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %401 = "mhlo.dot"(%400, %cst_804) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %402 = chlo.broadcast_add %401, %cst_803 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %403 = "mhlo.reshape"(%402) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %404 = chlo.broadcast_add %403, %394 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %405 = chlo.broadcast_multiply %404, %cst_802 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %406 = chlo.broadcast_add %405, %cst_801 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %407 = "mhlo.reshape"(%406) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %408 = "mhlo.dot"(%407, %cst_806) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %409 = chlo.broadcast_add %408, %cst_805 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %410 = "mhlo.reshape"(%409) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %411 = chlo.broadcast_maximum %410, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %412 = "mhlo.reshape"(%411) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %413 = "mhlo.dot"(%412, %cst_814) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %414 = chlo.broadcast_add %413, %cst_813 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %415 = "mhlo.reshape"(%414) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %416 = chlo.broadcast_add %415, %406 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %417 = chlo.broadcast_multiply %416, %cst_808 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %418 = chlo.broadcast_add %417, %cst_807 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %419 = "mhlo.reshape"(%418) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %420 = "mhlo.dot"(%419, %cst_812) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %421 = chlo.broadcast_add %420, %cst_811 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %422 = "mhlo.reshape"(%421) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %423 = chlo.broadcast_add %422, %326 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %424 = chlo.broadcast_multiply %423, %cst_810 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %425 = chlo.broadcast_add %424, %cst_809 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %426 = "mhlo.reshape"(%425) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %427 = "mhlo.dot"(%426, %cst_824) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %428 = chlo.broadcast_add %427, %cst_823 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %429 = "mhlo.reshape"(%428) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %430 = "mhlo.transpose"(%429) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %431 = "mhlo.dot"(%426, %cst_828) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %432 = "mhlo.reshape"(%431) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %433 = "mhlo.broadcast_in_dim"(%cst_827) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %434 = mhlo.add %432, %433 : tensor<1x384x128xf32> | |
| %435 = chlo.broadcast_multiply %434, %cst_826 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %436 = chlo.broadcast_add %435, %cst_825 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %437 = "mhlo.reshape"(%436) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %438 = "mhlo.dot"(%437, %cst_820) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %439 = chlo.broadcast_add %438, %cst_819 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %440 = "mhlo.reshape"(%439) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %441 = "mhlo.transpose"(%440) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %442 = "mhlo.dot"(%437, %cst_822) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %443 = chlo.broadcast_add %442, %cst_821 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %444 = "mhlo.reshape"(%443) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %445 = "mhlo.transpose"(%444) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %446 = "mhlo.dot_general"(%445, %441) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %447 = chlo.broadcast_multiply %446, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %448 = chlo.broadcast_add %447, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %449 = "mhlo.reduce"(%448, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %450 = linalg.tensor_expand_shape %449 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %451 = chlo.broadcast_subtract %448, %450 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %452 = "mhlo.exponential"(%451) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %453 = "mhlo.reduce"(%452, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %454 = linalg.tensor_expand_shape %453 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %455 = chlo.broadcast_divide %452, %454 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %456 = "mhlo.dot_general"(%455, %430) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %457 = "mhlo.transpose"(%456) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %458 = "mhlo.reshape"(%457) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %459 = "mhlo.dot"(%458, %cst_818) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %460 = chlo.broadcast_add %459, %cst_817 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %461 = "mhlo.reshape"(%460) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %462 = "mhlo.dot"(%426, %cst_831) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %463 = chlo.broadcast_add %462, %cst_830 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %464 = "mhlo.reshape"(%463) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %465 = chlo.broadcast_multiply %464, %cst_829 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %466 = chlo.broadcast_add %465, %cst_817 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %467 = chlo.broadcast_add %461, %466 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %468 = chlo.broadcast_multiply %467, %cst_816 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %469 = chlo.broadcast_add %468, %cst_815 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %470 = "mhlo.reshape"(%469) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %471 = "mhlo.dot"(%470, %cst_833) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %472 = chlo.broadcast_add %471, %cst_832 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %473 = "mhlo.reshape"(%472) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %474 = chlo.broadcast_maximum %473, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %475 = "mhlo.reshape"(%474) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %476 = "mhlo.dot"(%475, %cst_837) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %477 = chlo.broadcast_add %476, %cst_836 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %478 = "mhlo.reshape"(%477) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %479 = chlo.broadcast_add %478, %469 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %480 = chlo.broadcast_multiply %479, %cst_835 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %481 = chlo.broadcast_add %480, %cst_834 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %482 = "mhlo.reshape"(%481) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %483 = "mhlo.dot"(%482, %cst_839) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %484 = chlo.broadcast_add %483, %cst_838 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %485 = "mhlo.reshape"(%484) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %486 = chlo.broadcast_maximum %485, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %487 = "mhlo.reshape"(%486) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %488 = "mhlo.dot"(%487, %cst_843) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %489 = chlo.broadcast_add %488, %cst_842 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %490 = "mhlo.reshape"(%489) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %491 = chlo.broadcast_add %490, %481 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %492 = chlo.broadcast_multiply %491, %cst_841 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %493 = chlo.broadcast_add %492, %cst_840 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %494 = "mhlo.reshape"(%493) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %495 = "mhlo.dot"(%494, %cst_845) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %496 = chlo.broadcast_add %495, %cst_844 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %497 = "mhlo.reshape"(%496) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %498 = chlo.broadcast_maximum %497, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %499 = "mhlo.reshape"(%498) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %500 = "mhlo.dot"(%499, %cst_849) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %501 = chlo.broadcast_add %500, %cst_848 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %502 = "mhlo.reshape"(%501) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %503 = chlo.broadcast_add %502, %493 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %504 = chlo.broadcast_multiply %503, %cst_847 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %505 = chlo.broadcast_add %504, %cst_846 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %506 = "mhlo.reshape"(%505) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %507 = "mhlo.dot"(%506, %cst_851) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %508 = chlo.broadcast_add %507, %cst_850 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %509 = "mhlo.reshape"(%508) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %510 = chlo.broadcast_maximum %509, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %511 = "mhlo.reshape"(%510) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %512 = "mhlo.dot"(%511, %cst_859) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %513 = chlo.broadcast_add %512, %cst_858 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %514 = "mhlo.reshape"(%513) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %515 = chlo.broadcast_add %514, %505 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %516 = chlo.broadcast_multiply %515, %cst_853 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %517 = chlo.broadcast_add %516, %cst_852 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %518 = "mhlo.reshape"(%517) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %519 = "mhlo.dot"(%518, %cst_857) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %520 = chlo.broadcast_add %519, %cst_856 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %521 = "mhlo.reshape"(%520) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %522 = chlo.broadcast_add %521, %425 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %523 = chlo.broadcast_multiply %522, %cst_855 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %524 = chlo.broadcast_add %523, %cst_854 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %525 = "mhlo.reshape"(%524) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %526 = "mhlo.dot"(%525, %cst_869) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %527 = chlo.broadcast_add %526, %cst_868 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %528 = "mhlo.reshape"(%527) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %529 = "mhlo.transpose"(%528) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %530 = "mhlo.dot"(%525, %cst_873) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %531 = "mhlo.reshape"(%530) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %532 = "mhlo.broadcast_in_dim"(%cst_872) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %533 = mhlo.add %531, %532 : tensor<1x384x128xf32> | |
| %534 = chlo.broadcast_multiply %533, %cst_871 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %535 = chlo.broadcast_add %534, %cst_870 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %536 = "mhlo.reshape"(%535) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %537 = "mhlo.dot"(%536, %cst_865) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %538 = chlo.broadcast_add %537, %cst_864 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %539 = "mhlo.reshape"(%538) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %540 = "mhlo.transpose"(%539) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %541 = "mhlo.dot"(%536, %cst_867) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %542 = chlo.broadcast_add %541, %cst_866 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %543 = "mhlo.reshape"(%542) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %544 = "mhlo.transpose"(%543) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %545 = "mhlo.dot_general"(%544, %540) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %546 = chlo.broadcast_multiply %545, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %547 = chlo.broadcast_add %546, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %548 = "mhlo.reduce"(%547, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %549 = linalg.tensor_expand_shape %548 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %550 = chlo.broadcast_subtract %547, %549 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %551 = "mhlo.exponential"(%550) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %552 = "mhlo.reduce"(%551, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %553 = linalg.tensor_expand_shape %552 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %554 = chlo.broadcast_divide %551, %553 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %555 = "mhlo.dot_general"(%554, %529) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %556 = "mhlo.transpose"(%555) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %557 = "mhlo.reshape"(%556) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %558 = "mhlo.dot"(%557, %cst_863) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %559 = chlo.broadcast_add %558, %cst_862 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %560 = "mhlo.reshape"(%559) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %561 = "mhlo.dot"(%525, %cst_876) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %562 = chlo.broadcast_add %561, %cst_875 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %563 = "mhlo.reshape"(%562) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %564 = chlo.broadcast_multiply %563, %cst_874 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %565 = chlo.broadcast_add %564, %cst_862 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %566 = chlo.broadcast_add %560, %565 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %567 = chlo.broadcast_multiply %566, %cst_861 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %568 = chlo.broadcast_add %567, %cst_860 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %569 = "mhlo.reshape"(%568) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %570 = "mhlo.dot"(%569, %cst_878) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %571 = chlo.broadcast_add %570, %cst_877 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %572 = "mhlo.reshape"(%571) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %573 = chlo.broadcast_maximum %572, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %574 = "mhlo.reshape"(%573) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %575 = "mhlo.dot"(%574, %cst_882) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %576 = chlo.broadcast_add %575, %cst_881 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %577 = "mhlo.reshape"(%576) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %578 = chlo.broadcast_add %577, %568 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %579 = chlo.broadcast_multiply %578, %cst_880 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %580 = chlo.broadcast_add %579, %cst_879 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %581 = "mhlo.reshape"(%580) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %582 = "mhlo.dot"(%581, %cst_884) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %583 = chlo.broadcast_add %582, %cst_883 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %584 = "mhlo.reshape"(%583) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %585 = chlo.broadcast_maximum %584, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %586 = "mhlo.reshape"(%585) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %587 = "mhlo.dot"(%586, %cst_888) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %588 = chlo.broadcast_add %587, %cst_887 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %589 = "mhlo.reshape"(%588) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %590 = chlo.broadcast_add %589, %580 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %591 = chlo.broadcast_multiply %590, %cst_886 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %592 = chlo.broadcast_add %591, %cst_885 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %593 = "mhlo.reshape"(%592) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %594 = "mhlo.dot"(%593, %cst_890) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %595 = chlo.broadcast_add %594, %cst_889 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %596 = "mhlo.reshape"(%595) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %597 = chlo.broadcast_maximum %596, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %598 = "mhlo.reshape"(%597) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %599 = "mhlo.dot"(%598, %cst_894) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %600 = chlo.broadcast_add %599, %cst_893 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %601 = "mhlo.reshape"(%600) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %602 = chlo.broadcast_add %601, %592 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %603 = chlo.broadcast_multiply %602, %cst_892 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %604 = chlo.broadcast_add %603, %cst_891 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %605 = "mhlo.reshape"(%604) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %606 = "mhlo.dot"(%605, %cst_896) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %607 = chlo.broadcast_add %606, %cst_895 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %608 = "mhlo.reshape"(%607) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %609 = chlo.broadcast_maximum %608, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %610 = "mhlo.reshape"(%609) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %611 = "mhlo.dot"(%610, %cst_904) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %612 = chlo.broadcast_add %611, %cst_903 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %613 = "mhlo.reshape"(%612) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %614 = chlo.broadcast_add %613, %604 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %615 = chlo.broadcast_multiply %614, %cst_898 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %616 = chlo.broadcast_add %615, %cst_897 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %617 = "mhlo.reshape"(%616) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %618 = "mhlo.dot"(%617, %cst_902) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %619 = chlo.broadcast_add %618, %cst_901 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %620 = "mhlo.reshape"(%619) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %621 = chlo.broadcast_add %620, %524 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %622 = chlo.broadcast_multiply %621, %cst_900 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %623 = chlo.broadcast_add %622, %cst_899 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %624 = "mhlo.reshape"(%623) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %625 = "mhlo.dot"(%624, %cst_914) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %626 = chlo.broadcast_add %625, %cst_913 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %627 = "mhlo.reshape"(%626) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %628 = "mhlo.transpose"(%627) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %629 = "mhlo.dot"(%624, %cst_918) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %630 = "mhlo.reshape"(%629) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %631 = "mhlo.broadcast_in_dim"(%cst_917) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %632 = mhlo.add %630, %631 : tensor<1x384x128xf32> | |
| %633 = chlo.broadcast_multiply %632, %cst_916 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %634 = chlo.broadcast_add %633, %cst_915 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %635 = "mhlo.reshape"(%634) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %636 = "mhlo.dot"(%635, %cst_910) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %637 = chlo.broadcast_add %636, %cst_909 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %638 = "mhlo.reshape"(%637) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %639 = "mhlo.transpose"(%638) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %640 = "mhlo.dot"(%635, %cst_912) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %641 = chlo.broadcast_add %640, %cst_911 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %642 = "mhlo.reshape"(%641) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %643 = "mhlo.transpose"(%642) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %644 = "mhlo.dot_general"(%643, %639) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %645 = chlo.broadcast_multiply %644, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %646 = chlo.broadcast_add %645, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %647 = "mhlo.reduce"(%646, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %648 = linalg.tensor_expand_shape %647 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %649 = chlo.broadcast_subtract %646, %648 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %650 = "mhlo.exponential"(%649) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %651 = "mhlo.reduce"(%650, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %652 = linalg.tensor_expand_shape %651 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %653 = chlo.broadcast_divide %650, %652 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %654 = "mhlo.dot_general"(%653, %628) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %655 = "mhlo.transpose"(%654) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %656 = "mhlo.reshape"(%655) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %657 = "mhlo.dot"(%656, %cst_908) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %658 = chlo.broadcast_add %657, %cst_907 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %659 = "mhlo.reshape"(%658) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %660 = "mhlo.dot"(%624, %cst_921) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %661 = chlo.broadcast_add %660, %cst_920 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %662 = "mhlo.reshape"(%661) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %663 = chlo.broadcast_multiply %662, %cst_919 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %664 = chlo.broadcast_add %663, %cst_907 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %665 = chlo.broadcast_add %659, %664 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %666 = chlo.broadcast_multiply %665, %cst_906 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %667 = chlo.broadcast_add %666, %cst_905 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %668 = "mhlo.reshape"(%667) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %669 = "mhlo.dot"(%668, %cst_923) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %670 = chlo.broadcast_add %669, %cst_922 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %671 = "mhlo.reshape"(%670) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %672 = chlo.broadcast_maximum %671, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %673 = "mhlo.reshape"(%672) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %674 = "mhlo.dot"(%673, %cst_927) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %675 = chlo.broadcast_add %674, %cst_926 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %676 = "mhlo.reshape"(%675) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %677 = chlo.broadcast_add %676, %667 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %678 = chlo.broadcast_multiply %677, %cst_925 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %679 = chlo.broadcast_add %678, %cst_924 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %680 = "mhlo.reshape"(%679) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %681 = "mhlo.dot"(%680, %cst_929) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %682 = chlo.broadcast_add %681, %cst_928 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %683 = "mhlo.reshape"(%682) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %684 = chlo.broadcast_maximum %683, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %685 = "mhlo.reshape"(%684) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %686 = "mhlo.dot"(%685, %cst_933) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %687 = chlo.broadcast_add %686, %cst_932 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %688 = "mhlo.reshape"(%687) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %689 = chlo.broadcast_add %688, %679 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %690 = chlo.broadcast_multiply %689, %cst_931 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %691 = chlo.broadcast_add %690, %cst_930 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %692 = "mhlo.reshape"(%691) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %693 = "mhlo.dot"(%692, %cst_935) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %694 = chlo.broadcast_add %693, %cst_934 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %695 = "mhlo.reshape"(%694) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %696 = chlo.broadcast_maximum %695, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %697 = "mhlo.reshape"(%696) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %698 = "mhlo.dot"(%697, %cst_939) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %699 = chlo.broadcast_add %698, %cst_938 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %700 = "mhlo.reshape"(%699) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %701 = chlo.broadcast_add %700, %691 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %702 = chlo.broadcast_multiply %701, %cst_937 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %703 = chlo.broadcast_add %702, %cst_936 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %704 = "mhlo.reshape"(%703) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %705 = "mhlo.dot"(%704, %cst_941) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %706 = chlo.broadcast_add %705, %cst_940 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %707 = "mhlo.reshape"(%706) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %708 = chlo.broadcast_maximum %707, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %709 = "mhlo.reshape"(%708) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %710 = "mhlo.dot"(%709, %cst_949) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %711 = chlo.broadcast_add %710, %cst_948 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %712 = "mhlo.reshape"(%711) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %713 = chlo.broadcast_add %712, %703 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %714 = chlo.broadcast_multiply %713, %cst_943 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %715 = chlo.broadcast_add %714, %cst_942 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %716 = "mhlo.reshape"(%715) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %717 = "mhlo.dot"(%716, %cst_947) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %718 = chlo.broadcast_add %717, %cst_946 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %719 = "mhlo.reshape"(%718) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %720 = chlo.broadcast_add %719, %623 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %721 = chlo.broadcast_multiply %720, %cst_945 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %722 = chlo.broadcast_add %721, %cst_944 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %723 = "mhlo.reshape"(%722) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %724 = "mhlo.dot"(%723, %cst_959) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %725 = chlo.broadcast_add %724, %cst_958 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %726 = "mhlo.reshape"(%725) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %727 = "mhlo.transpose"(%726) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %728 = "mhlo.dot"(%723, %cst_963) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %729 = "mhlo.reshape"(%728) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %730 = "mhlo.broadcast_in_dim"(%cst_962) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %731 = mhlo.add %729, %730 : tensor<1x384x128xf32> | |
| %732 = chlo.broadcast_multiply %731, %cst_961 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %733 = chlo.broadcast_add %732, %cst_960 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %734 = "mhlo.reshape"(%733) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %735 = "mhlo.dot"(%734, %cst_955) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %736 = chlo.broadcast_add %735, %cst_954 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %737 = "mhlo.reshape"(%736) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %738 = "mhlo.transpose"(%737) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %739 = "mhlo.dot"(%734, %cst_957) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %740 = chlo.broadcast_add %739, %cst_956 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %741 = "mhlo.reshape"(%740) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %742 = "mhlo.transpose"(%741) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %743 = "mhlo.dot_general"(%742, %738) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %744 = chlo.broadcast_multiply %743, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %745 = chlo.broadcast_add %744, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %746 = "mhlo.reduce"(%745, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %747 = linalg.tensor_expand_shape %746 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %748 = chlo.broadcast_subtract %745, %747 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %749 = "mhlo.exponential"(%748) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %750 = "mhlo.reduce"(%749, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %751 = linalg.tensor_expand_shape %750 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %752 = chlo.broadcast_divide %749, %751 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %753 = "mhlo.dot_general"(%752, %727) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %754 = "mhlo.transpose"(%753) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %755 = "mhlo.reshape"(%754) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %756 = "mhlo.dot"(%755, %cst_953) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %757 = chlo.broadcast_add %756, %cst_952 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %758 = "mhlo.reshape"(%757) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %759 = "mhlo.dot"(%723, %cst_966) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %760 = chlo.broadcast_add %759, %cst_965 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %761 = "mhlo.reshape"(%760) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %762 = chlo.broadcast_multiply %761, %cst_964 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %763 = chlo.broadcast_add %762, %cst_952 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %764 = chlo.broadcast_add %758, %763 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %765 = chlo.broadcast_multiply %764, %cst_951 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %766 = chlo.broadcast_add %765, %cst_950 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %767 = "mhlo.reshape"(%766) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %768 = "mhlo.dot"(%767, %cst_968) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %769 = chlo.broadcast_add %768, %cst_967 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %770 = "mhlo.reshape"(%769) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %771 = chlo.broadcast_maximum %770, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %772 = "mhlo.reshape"(%771) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %773 = "mhlo.dot"(%772, %cst_972) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %774 = chlo.broadcast_add %773, %cst_971 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %775 = "mhlo.reshape"(%774) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %776 = chlo.broadcast_add %775, %766 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %777 = chlo.broadcast_multiply %776, %cst_970 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %778 = chlo.broadcast_add %777, %cst_969 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %779 = "mhlo.reshape"(%778) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %780 = "mhlo.dot"(%779, %cst_974) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %781 = chlo.broadcast_add %780, %cst_973 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %782 = "mhlo.reshape"(%781) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %783 = chlo.broadcast_maximum %782, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %784 = "mhlo.reshape"(%783) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %785 = "mhlo.dot"(%784, %cst_978) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %786 = chlo.broadcast_add %785, %cst_977 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %787 = "mhlo.reshape"(%786) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %788 = chlo.broadcast_add %787, %778 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %789 = chlo.broadcast_multiply %788, %cst_976 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %790 = chlo.broadcast_add %789, %cst_975 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %791 = "mhlo.reshape"(%790) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %792 = "mhlo.dot"(%791, %cst_980) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %793 = chlo.broadcast_add %792, %cst_979 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %794 = "mhlo.reshape"(%793) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %795 = chlo.broadcast_maximum %794, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %796 = "mhlo.reshape"(%795) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %797 = "mhlo.dot"(%796, %cst_984) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %798 = chlo.broadcast_add %797, %cst_983 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %799 = "mhlo.reshape"(%798) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %800 = chlo.broadcast_add %799, %790 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %801 = chlo.broadcast_multiply %800, %cst_982 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %802 = chlo.broadcast_add %801, %cst_981 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %803 = "mhlo.reshape"(%802) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %804 = "mhlo.dot"(%803, %cst_986) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %805 = chlo.broadcast_add %804, %cst_985 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %806 = "mhlo.reshape"(%805) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %807 = chlo.broadcast_maximum %806, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %808 = "mhlo.reshape"(%807) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %809 = "mhlo.dot"(%808, %cst_994) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %810 = chlo.broadcast_add %809, %cst_993 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %811 = "mhlo.reshape"(%810) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %812 = chlo.broadcast_add %811, %802 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %813 = chlo.broadcast_multiply %812, %cst_988 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %814 = chlo.broadcast_add %813, %cst_987 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %815 = "mhlo.reshape"(%814) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %816 = "mhlo.dot"(%815, %cst_992) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %817 = chlo.broadcast_add %816, %cst_991 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %818 = "mhlo.reshape"(%817) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %819 = chlo.broadcast_add %818, %722 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %820 = chlo.broadcast_multiply %819, %cst_990 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %821 = chlo.broadcast_add %820, %cst_989 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %822 = "mhlo.reshape"(%821) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %823 = "mhlo.dot"(%822, %cst_1004) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %824 = chlo.broadcast_add %823, %cst_1003 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %825 = "mhlo.reshape"(%824) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %826 = "mhlo.transpose"(%825) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %827 = "mhlo.dot"(%822, %cst_1008) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %828 = "mhlo.reshape"(%827) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %829 = "mhlo.broadcast_in_dim"(%cst_1007) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %830 = mhlo.add %828, %829 : tensor<1x384x128xf32> | |
| %831 = chlo.broadcast_multiply %830, %cst_1006 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %832 = chlo.broadcast_add %831, %cst_1005 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %833 = "mhlo.reshape"(%832) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %834 = "mhlo.dot"(%833, %cst_1000) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %835 = chlo.broadcast_add %834, %cst_999 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %836 = "mhlo.reshape"(%835) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %837 = "mhlo.transpose"(%836) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %838 = "mhlo.dot"(%833, %cst_1002) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %839 = chlo.broadcast_add %838, %cst_1001 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %840 = "mhlo.reshape"(%839) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %841 = "mhlo.transpose"(%840) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %842 = "mhlo.dot_general"(%841, %837) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %843 = chlo.broadcast_multiply %842, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %844 = chlo.broadcast_add %843, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %845 = "mhlo.reduce"(%844, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %846 = linalg.tensor_expand_shape %845 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %847 = chlo.broadcast_subtract %844, %846 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %848 = "mhlo.exponential"(%847) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %849 = "mhlo.reduce"(%848, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %850 = linalg.tensor_expand_shape %849 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %851 = chlo.broadcast_divide %848, %850 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %852 = "mhlo.dot_general"(%851, %826) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %853 = "mhlo.transpose"(%852) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %854 = "mhlo.reshape"(%853) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %855 = "mhlo.dot"(%854, %cst_998) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %856 = chlo.broadcast_add %855, %cst_997 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %857 = "mhlo.reshape"(%856) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %858 = "mhlo.dot"(%822, %cst_1011) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %859 = chlo.broadcast_add %858, %cst_1010 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %860 = "mhlo.reshape"(%859) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %861 = chlo.broadcast_multiply %860, %cst_1009 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %862 = chlo.broadcast_add %861, %cst_997 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %863 = chlo.broadcast_add %857, %862 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %864 = chlo.broadcast_multiply %863, %cst_996 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %865 = chlo.broadcast_add %864, %cst_995 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %866 = "mhlo.reshape"(%865) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %867 = "mhlo.dot"(%866, %cst_1013) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %868 = chlo.broadcast_add %867, %cst_1012 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %869 = "mhlo.reshape"(%868) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %870 = chlo.broadcast_maximum %869, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %871 = "mhlo.reshape"(%870) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %872 = "mhlo.dot"(%871, %cst_1017) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %873 = chlo.broadcast_add %872, %cst_1016 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %874 = "mhlo.reshape"(%873) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %875 = chlo.broadcast_add %874, %865 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %876 = chlo.broadcast_multiply %875, %cst_1015 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %877 = chlo.broadcast_add %876, %cst_1014 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %878 = "mhlo.reshape"(%877) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %879 = "mhlo.dot"(%878, %cst_1019) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %880 = chlo.broadcast_add %879, %cst_1018 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %881 = "mhlo.reshape"(%880) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %882 = chlo.broadcast_maximum %881, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %883 = "mhlo.reshape"(%882) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %884 = "mhlo.dot"(%883, %cst_1023) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %885 = chlo.broadcast_add %884, %cst_1022 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %886 = "mhlo.reshape"(%885) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %887 = chlo.broadcast_add %886, %877 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %888 = chlo.broadcast_multiply %887, %cst_1021 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %889 = chlo.broadcast_add %888, %cst_1020 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %890 = "mhlo.reshape"(%889) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %891 = "mhlo.dot"(%890, %cst_1025) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %892 = chlo.broadcast_add %891, %cst_1024 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %893 = "mhlo.reshape"(%892) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %894 = chlo.broadcast_maximum %893, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %895 = "mhlo.reshape"(%894) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %896 = "mhlo.dot"(%895, %cst_1029) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %897 = chlo.broadcast_add %896, %cst_1028 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %898 = "mhlo.reshape"(%897) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %899 = chlo.broadcast_add %898, %889 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %900 = chlo.broadcast_multiply %899, %cst_1027 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %901 = chlo.broadcast_add %900, %cst_1026 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %902 = "mhlo.reshape"(%901) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %903 = "mhlo.dot"(%902, %cst_1031) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %904 = chlo.broadcast_add %903, %cst_1030 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %905 = "mhlo.reshape"(%904) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %906 = chlo.broadcast_maximum %905, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %907 = "mhlo.reshape"(%906) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %908 = "mhlo.dot"(%907, %cst_1039) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %909 = chlo.broadcast_add %908, %cst_1038 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %910 = "mhlo.reshape"(%909) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %911 = chlo.broadcast_add %910, %901 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %912 = chlo.broadcast_multiply %911, %cst_1033 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %913 = chlo.broadcast_add %912, %cst_1032 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %914 = "mhlo.reshape"(%913) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %915 = "mhlo.dot"(%914, %cst_1037) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %916 = chlo.broadcast_add %915, %cst_1036 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %917 = "mhlo.reshape"(%916) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %918 = chlo.broadcast_add %917, %821 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %919 = chlo.broadcast_multiply %918, %cst_1035 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %920 = chlo.broadcast_add %919, %cst_1034 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %921 = "mhlo.reshape"(%920) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %922 = "mhlo.dot"(%921, %cst_1049) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %923 = chlo.broadcast_add %922, %cst_1048 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %924 = "mhlo.reshape"(%923) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %925 = "mhlo.transpose"(%924) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %926 = "mhlo.dot"(%921, %cst_1053) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %927 = "mhlo.reshape"(%926) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %928 = "mhlo.broadcast_in_dim"(%cst_1052) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %929 = mhlo.add %927, %928 : tensor<1x384x128xf32> | |
| %930 = chlo.broadcast_multiply %929, %cst_1051 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %931 = chlo.broadcast_add %930, %cst_1050 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %932 = "mhlo.reshape"(%931) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %933 = "mhlo.dot"(%932, %cst_1045) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %934 = chlo.broadcast_add %933, %cst_1044 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %935 = "mhlo.reshape"(%934) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %936 = "mhlo.transpose"(%935) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %937 = "mhlo.dot"(%932, %cst_1047) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %938 = chlo.broadcast_add %937, %cst_1046 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %939 = "mhlo.reshape"(%938) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %940 = "mhlo.transpose"(%939) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %941 = "mhlo.dot_general"(%940, %936) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %942 = chlo.broadcast_multiply %941, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %943 = chlo.broadcast_add %942, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %944 = "mhlo.reduce"(%943, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %945 = linalg.tensor_expand_shape %944 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %946 = chlo.broadcast_subtract %943, %945 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %947 = "mhlo.exponential"(%946) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %948 = "mhlo.reduce"(%947, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %949 = linalg.tensor_expand_shape %948 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %950 = chlo.broadcast_divide %947, %949 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %951 = "mhlo.dot_general"(%950, %925) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %952 = "mhlo.transpose"(%951) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %953 = "mhlo.reshape"(%952) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %954 = "mhlo.dot"(%953, %cst_1043) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %955 = chlo.broadcast_add %954, %cst_1042 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %956 = "mhlo.reshape"(%955) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %957 = "mhlo.dot"(%921, %cst_1056) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %958 = chlo.broadcast_add %957, %cst_1055 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %959 = "mhlo.reshape"(%958) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %960 = chlo.broadcast_multiply %959, %cst_1054 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %961 = chlo.broadcast_add %960, %cst_1042 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %962 = chlo.broadcast_add %956, %961 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %963 = chlo.broadcast_multiply %962, %cst_1041 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %964 = chlo.broadcast_add %963, %cst_1040 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %965 = "mhlo.reshape"(%964) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %966 = "mhlo.dot"(%965, %cst_1058) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %967 = chlo.broadcast_add %966, %cst_1057 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %968 = "mhlo.reshape"(%967) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %969 = chlo.broadcast_maximum %968, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %970 = "mhlo.reshape"(%969) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %971 = "mhlo.dot"(%970, %cst_1062) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %972 = chlo.broadcast_add %971, %cst_1061 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %973 = "mhlo.reshape"(%972) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %974 = chlo.broadcast_add %973, %964 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %975 = chlo.broadcast_multiply %974, %cst_1060 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %976 = chlo.broadcast_add %975, %cst_1059 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %977 = "mhlo.reshape"(%976) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %978 = "mhlo.dot"(%977, %cst_1064) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %979 = chlo.broadcast_add %978, %cst_1063 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %980 = "mhlo.reshape"(%979) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %981 = chlo.broadcast_maximum %980, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %982 = "mhlo.reshape"(%981) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %983 = "mhlo.dot"(%982, %cst_1068) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %984 = chlo.broadcast_add %983, %cst_1067 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %985 = "mhlo.reshape"(%984) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %986 = chlo.broadcast_add %985, %976 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %987 = chlo.broadcast_multiply %986, %cst_1066 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %988 = chlo.broadcast_add %987, %cst_1065 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %989 = "mhlo.reshape"(%988) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %990 = "mhlo.dot"(%989, %cst_1070) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %991 = chlo.broadcast_add %990, %cst_1069 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %992 = "mhlo.reshape"(%991) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %993 = chlo.broadcast_maximum %992, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %994 = "mhlo.reshape"(%993) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %995 = "mhlo.dot"(%994, %cst_1074) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %996 = chlo.broadcast_add %995, %cst_1073 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %997 = "mhlo.reshape"(%996) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %998 = chlo.broadcast_add %997, %988 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %999 = chlo.broadcast_multiply %998, %cst_1072 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1000 = chlo.broadcast_add %999, %cst_1071 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1001 = "mhlo.reshape"(%1000) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1002 = "mhlo.dot"(%1001, %cst_1076) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1003 = chlo.broadcast_add %1002, %cst_1075 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1004 = "mhlo.reshape"(%1003) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1005 = chlo.broadcast_maximum %1004, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1006 = "mhlo.reshape"(%1005) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1007 = "mhlo.dot"(%1006, %cst_1084) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1008 = chlo.broadcast_add %1007, %cst_1083 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1009 = "mhlo.reshape"(%1008) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1010 = chlo.broadcast_add %1009, %1000 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1011 = chlo.broadcast_multiply %1010, %cst_1078 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1012 = chlo.broadcast_add %1011, %cst_1077 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1013 = "mhlo.reshape"(%1012) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1014 = "mhlo.dot"(%1013, %cst_1082) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1015 = chlo.broadcast_add %1014, %cst_1081 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1016 = "mhlo.reshape"(%1015) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1017 = chlo.broadcast_add %1016, %920 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1018 = chlo.broadcast_multiply %1017, %cst_1080 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1019 = chlo.broadcast_add %1018, %cst_1079 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1020 = "mhlo.reshape"(%1019) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1021 = "mhlo.dot"(%1020, %cst_104) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1022 = chlo.broadcast_add %1021, %cst_103 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1023 = "mhlo.reshape"(%1022) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1024 = "mhlo.transpose"(%1023) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1025 = "mhlo.dot"(%1020, %cst_108) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1026 = "mhlo.reshape"(%1025) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1027 = "mhlo.broadcast_in_dim"(%cst_107) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1028 = mhlo.add %1026, %1027 : tensor<1x384x128xf32> | |
| %1029 = chlo.broadcast_multiply %1028, %cst_106 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1030 = chlo.broadcast_add %1029, %cst_105 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1031 = "mhlo.reshape"(%1030) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1032 = "mhlo.dot"(%1031, %cst_100) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1033 = chlo.broadcast_add %1032, %cst_99 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1034 = "mhlo.reshape"(%1033) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1035 = "mhlo.transpose"(%1034) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1036 = "mhlo.dot"(%1031, %cst_102) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1037 = chlo.broadcast_add %1036, %cst_101 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1038 = "mhlo.reshape"(%1037) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1039 = "mhlo.transpose"(%1038) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1040 = "mhlo.dot_general"(%1039, %1035) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1041 = chlo.broadcast_multiply %1040, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1042 = chlo.broadcast_add %1041, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1043 = "mhlo.reduce"(%1042, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1044 = linalg.tensor_expand_shape %1043 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1045 = chlo.broadcast_subtract %1042, %1044 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1046 = "mhlo.exponential"(%1045) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1047 = "mhlo.reduce"(%1046, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1048 = linalg.tensor_expand_shape %1047 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1049 = chlo.broadcast_divide %1046, %1048 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1050 = "mhlo.dot_general"(%1049, %1024) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1051 = "mhlo.transpose"(%1050) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1052 = "mhlo.reshape"(%1051) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1053 = "mhlo.dot"(%1052, %cst_98) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1054 = chlo.broadcast_add %1053, %cst_97 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1055 = "mhlo.reshape"(%1054) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1056 = "mhlo.dot"(%1020, %cst_111) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1057 = chlo.broadcast_add %1056, %cst_110 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1058 = "mhlo.reshape"(%1057) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1059 = chlo.broadcast_multiply %1058, %cst_109 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1060 = chlo.broadcast_add %1059, %cst_97 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1061 = chlo.broadcast_add %1055, %1060 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1062 = chlo.broadcast_multiply %1061, %cst_96 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1063 = chlo.broadcast_add %1062, %cst_95 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1064 = "mhlo.reshape"(%1063) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1065 = "mhlo.dot"(%1064, %cst_113) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1066 = chlo.broadcast_add %1065, %cst_112 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1067 = "mhlo.reshape"(%1066) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1068 = chlo.broadcast_maximum %1067, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1069 = "mhlo.reshape"(%1068) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1070 = "mhlo.dot"(%1069, %cst_117) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1071 = chlo.broadcast_add %1070, %cst_116 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1072 = "mhlo.reshape"(%1071) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1073 = chlo.broadcast_add %1072, %1063 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1074 = chlo.broadcast_multiply %1073, %cst_115 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1075 = chlo.broadcast_add %1074, %cst_114 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1076 = "mhlo.reshape"(%1075) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1077 = "mhlo.dot"(%1076, %cst_119) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1078 = chlo.broadcast_add %1077, %cst_118 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1079 = "mhlo.reshape"(%1078) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1080 = chlo.broadcast_maximum %1079, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1081 = "mhlo.reshape"(%1080) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1082 = "mhlo.dot"(%1081, %cst_123) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1083 = chlo.broadcast_add %1082, %cst_122 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1084 = "mhlo.reshape"(%1083) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1085 = chlo.broadcast_add %1084, %1075 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1086 = chlo.broadcast_multiply %1085, %cst_121 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1087 = chlo.broadcast_add %1086, %cst_120 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1088 = "mhlo.reshape"(%1087) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1089 = "mhlo.dot"(%1088, %cst_125) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1090 = chlo.broadcast_add %1089, %cst_124 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1091 = "mhlo.reshape"(%1090) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1092 = chlo.broadcast_maximum %1091, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1093 = "mhlo.reshape"(%1092) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1094 = "mhlo.dot"(%1093, %cst_129) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1095 = chlo.broadcast_add %1094, %cst_128 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1096 = "mhlo.reshape"(%1095) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1097 = chlo.broadcast_add %1096, %1087 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1098 = chlo.broadcast_multiply %1097, %cst_127 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1099 = chlo.broadcast_add %1098, %cst_126 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1100 = "mhlo.reshape"(%1099) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1101 = "mhlo.dot"(%1100, %cst_131) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1102 = chlo.broadcast_add %1101, %cst_130 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1103 = "mhlo.reshape"(%1102) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1104 = chlo.broadcast_maximum %1103, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1105 = "mhlo.reshape"(%1104) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1106 = "mhlo.dot"(%1105, %cst_139) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1107 = chlo.broadcast_add %1106, %cst_138 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1108 = "mhlo.reshape"(%1107) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1109 = chlo.broadcast_add %1108, %1099 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1110 = chlo.broadcast_multiply %1109, %cst_133 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1111 = chlo.broadcast_add %1110, %cst_132 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1112 = "mhlo.reshape"(%1111) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1113 = "mhlo.dot"(%1112, %cst_137) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1114 = chlo.broadcast_add %1113, %cst_136 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1115 = "mhlo.reshape"(%1114) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1116 = chlo.broadcast_add %1115, %1019 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1117 = chlo.broadcast_multiply %1116, %cst_135 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1118 = chlo.broadcast_add %1117, %cst_134 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1119 = "mhlo.reshape"(%1118) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1120 = "mhlo.dot"(%1119, %cst_149) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1121 = chlo.broadcast_add %1120, %cst_148 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1122 = "mhlo.reshape"(%1121) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1123 = "mhlo.transpose"(%1122) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1124 = "mhlo.dot"(%1119, %cst_153) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1125 = "mhlo.reshape"(%1124) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1126 = "mhlo.broadcast_in_dim"(%cst_152) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1127 = mhlo.add %1125, %1126 : tensor<1x384x128xf32> | |
| %1128 = chlo.broadcast_multiply %1127, %cst_151 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1129 = chlo.broadcast_add %1128, %cst_150 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1130 = "mhlo.reshape"(%1129) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1131 = "mhlo.dot"(%1130, %cst_145) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1132 = chlo.broadcast_add %1131, %cst_144 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1133 = "mhlo.reshape"(%1132) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1134 = "mhlo.transpose"(%1133) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1135 = "mhlo.dot"(%1130, %cst_147) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1136 = chlo.broadcast_add %1135, %cst_146 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1137 = "mhlo.reshape"(%1136) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1138 = "mhlo.transpose"(%1137) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1139 = "mhlo.dot_general"(%1138, %1134) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1140 = chlo.broadcast_multiply %1139, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1141 = chlo.broadcast_add %1140, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1142 = "mhlo.reduce"(%1141, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1143 = linalg.tensor_expand_shape %1142 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1144 = chlo.broadcast_subtract %1141, %1143 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1145 = "mhlo.exponential"(%1144) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1146 = "mhlo.reduce"(%1145, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1147 = linalg.tensor_expand_shape %1146 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1148 = chlo.broadcast_divide %1145, %1147 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1149 = "mhlo.dot_general"(%1148, %1123) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1150 = "mhlo.transpose"(%1149) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1151 = "mhlo.reshape"(%1150) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1152 = "mhlo.dot"(%1151, %cst_143) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1153 = chlo.broadcast_add %1152, %cst_142 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1154 = "mhlo.reshape"(%1153) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1155 = "mhlo.dot"(%1119, %cst_156) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1156 = chlo.broadcast_add %1155, %cst_155 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1157 = "mhlo.reshape"(%1156) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1158 = chlo.broadcast_multiply %1157, %cst_154 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1159 = chlo.broadcast_add %1158, %cst_142 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1160 = chlo.broadcast_add %1154, %1159 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1161 = chlo.broadcast_multiply %1160, %cst_141 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1162 = chlo.broadcast_add %1161, %cst_140 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1163 = "mhlo.reshape"(%1162) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1164 = "mhlo.dot"(%1163, %cst_158) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1165 = chlo.broadcast_add %1164, %cst_157 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1166 = "mhlo.reshape"(%1165) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1167 = chlo.broadcast_maximum %1166, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1168 = "mhlo.reshape"(%1167) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1169 = "mhlo.dot"(%1168, %cst_162) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1170 = chlo.broadcast_add %1169, %cst_161 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1171 = "mhlo.reshape"(%1170) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1172 = chlo.broadcast_add %1171, %1162 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1173 = chlo.broadcast_multiply %1172, %cst_160 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1174 = chlo.broadcast_add %1173, %cst_159 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1175 = "mhlo.reshape"(%1174) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1176 = "mhlo.dot"(%1175, %cst_164) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1177 = chlo.broadcast_add %1176, %cst_163 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1178 = "mhlo.reshape"(%1177) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1179 = chlo.broadcast_maximum %1178, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1180 = "mhlo.reshape"(%1179) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1181 = "mhlo.dot"(%1180, %cst_168) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1182 = chlo.broadcast_add %1181, %cst_167 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1183 = "mhlo.reshape"(%1182) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1184 = chlo.broadcast_add %1183, %1174 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1185 = chlo.broadcast_multiply %1184, %cst_166 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1186 = chlo.broadcast_add %1185, %cst_165 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1187 = "mhlo.reshape"(%1186) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1188 = "mhlo.dot"(%1187, %cst_170) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1189 = chlo.broadcast_add %1188, %cst_169 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1190 = "mhlo.reshape"(%1189) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1191 = chlo.broadcast_maximum %1190, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1192 = "mhlo.reshape"(%1191) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1193 = "mhlo.dot"(%1192, %cst_174) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1194 = chlo.broadcast_add %1193, %cst_173 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1195 = "mhlo.reshape"(%1194) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1196 = chlo.broadcast_add %1195, %1186 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1197 = chlo.broadcast_multiply %1196, %cst_172 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1198 = chlo.broadcast_add %1197, %cst_171 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1199 = "mhlo.reshape"(%1198) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1200 = "mhlo.dot"(%1199, %cst_176) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1201 = chlo.broadcast_add %1200, %cst_175 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1202 = "mhlo.reshape"(%1201) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1203 = chlo.broadcast_maximum %1202, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1204 = "mhlo.reshape"(%1203) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1205 = "mhlo.dot"(%1204, %cst_184) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1206 = chlo.broadcast_add %1205, %cst_183 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1207 = "mhlo.reshape"(%1206) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1208 = chlo.broadcast_add %1207, %1198 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1209 = chlo.broadcast_multiply %1208, %cst_178 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1210 = chlo.broadcast_add %1209, %cst_177 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1211 = "mhlo.reshape"(%1210) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1212 = "mhlo.dot"(%1211, %cst_182) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1213 = chlo.broadcast_add %1212, %cst_181 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1214 = "mhlo.reshape"(%1213) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1215 = chlo.broadcast_add %1214, %1118 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1216 = chlo.broadcast_multiply %1215, %cst_180 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1217 = chlo.broadcast_add %1216, %cst_179 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1218 = "mhlo.reshape"(%1217) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1219 = "mhlo.dot"(%1218, %cst_194) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1220 = chlo.broadcast_add %1219, %cst_193 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1221 = "mhlo.reshape"(%1220) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1222 = "mhlo.transpose"(%1221) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1223 = "mhlo.dot"(%1218, %cst_198) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1224 = "mhlo.reshape"(%1223) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1225 = "mhlo.broadcast_in_dim"(%cst_197) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1226 = mhlo.add %1224, %1225 : tensor<1x384x128xf32> | |
| %1227 = chlo.broadcast_multiply %1226, %cst_196 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1228 = chlo.broadcast_add %1227, %cst_195 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1229 = "mhlo.reshape"(%1228) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1230 = "mhlo.dot"(%1229, %cst_190) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1231 = chlo.broadcast_add %1230, %cst_189 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1232 = "mhlo.reshape"(%1231) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1233 = "mhlo.transpose"(%1232) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1234 = "mhlo.dot"(%1229, %cst_192) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1235 = chlo.broadcast_add %1234, %cst_191 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1236 = "mhlo.reshape"(%1235) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1237 = "mhlo.transpose"(%1236) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1238 = "mhlo.dot_general"(%1237, %1233) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1239 = chlo.broadcast_multiply %1238, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1240 = chlo.broadcast_add %1239, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1241 = "mhlo.reduce"(%1240, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1242 = linalg.tensor_expand_shape %1241 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1243 = chlo.broadcast_subtract %1240, %1242 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1244 = "mhlo.exponential"(%1243) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1245 = "mhlo.reduce"(%1244, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1246 = linalg.tensor_expand_shape %1245 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1247 = chlo.broadcast_divide %1244, %1246 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1248 = "mhlo.dot_general"(%1247, %1222) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1249 = "mhlo.transpose"(%1248) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1250 = "mhlo.reshape"(%1249) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1251 = "mhlo.dot"(%1250, %cst_188) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1252 = chlo.broadcast_add %1251, %cst_187 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1253 = "mhlo.reshape"(%1252) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1254 = "mhlo.dot"(%1218, %cst_201) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1255 = chlo.broadcast_add %1254, %cst_200 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1256 = "mhlo.reshape"(%1255) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1257 = chlo.broadcast_multiply %1256, %cst_199 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1258 = chlo.broadcast_add %1257, %cst_187 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1259 = chlo.broadcast_add %1253, %1258 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1260 = chlo.broadcast_multiply %1259, %cst_186 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1261 = chlo.broadcast_add %1260, %cst_185 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1262 = "mhlo.reshape"(%1261) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1263 = "mhlo.dot"(%1262, %cst_203) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1264 = chlo.broadcast_add %1263, %cst_202 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1265 = "mhlo.reshape"(%1264) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1266 = chlo.broadcast_maximum %1265, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1267 = "mhlo.reshape"(%1266) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1268 = "mhlo.dot"(%1267, %cst_207) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1269 = chlo.broadcast_add %1268, %cst_206 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1270 = "mhlo.reshape"(%1269) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1271 = chlo.broadcast_add %1270, %1261 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1272 = chlo.broadcast_multiply %1271, %cst_205 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1273 = chlo.broadcast_add %1272, %cst_204 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1274 = "mhlo.reshape"(%1273) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1275 = "mhlo.dot"(%1274, %cst_209) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1276 = chlo.broadcast_add %1275, %cst_208 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1277 = "mhlo.reshape"(%1276) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1278 = chlo.broadcast_maximum %1277, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1279 = "mhlo.reshape"(%1278) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1280 = "mhlo.dot"(%1279, %cst_213) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1281 = chlo.broadcast_add %1280, %cst_212 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1282 = "mhlo.reshape"(%1281) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1283 = chlo.broadcast_add %1282, %1273 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1284 = chlo.broadcast_multiply %1283, %cst_211 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1285 = chlo.broadcast_add %1284, %cst_210 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1286 = "mhlo.reshape"(%1285) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1287 = "mhlo.dot"(%1286, %cst_215) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1288 = chlo.broadcast_add %1287, %cst_214 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1289 = "mhlo.reshape"(%1288) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1290 = chlo.broadcast_maximum %1289, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1291 = "mhlo.reshape"(%1290) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1292 = "mhlo.dot"(%1291, %cst_219) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1293 = chlo.broadcast_add %1292, %cst_218 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1294 = "mhlo.reshape"(%1293) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1295 = chlo.broadcast_add %1294, %1285 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1296 = chlo.broadcast_multiply %1295, %cst_217 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1297 = chlo.broadcast_add %1296, %cst_216 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1298 = "mhlo.reshape"(%1297) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1299 = "mhlo.dot"(%1298, %cst_221) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1300 = chlo.broadcast_add %1299, %cst_220 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1301 = "mhlo.reshape"(%1300) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1302 = chlo.broadcast_maximum %1301, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1303 = "mhlo.reshape"(%1302) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1304 = "mhlo.dot"(%1303, %cst_229) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1305 = chlo.broadcast_add %1304, %cst_228 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1306 = "mhlo.reshape"(%1305) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1307 = chlo.broadcast_add %1306, %1297 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1308 = chlo.broadcast_multiply %1307, %cst_223 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1309 = chlo.broadcast_add %1308, %cst_222 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1310 = "mhlo.reshape"(%1309) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1311 = "mhlo.dot"(%1310, %cst_227) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1312 = chlo.broadcast_add %1311, %cst_226 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1313 = "mhlo.reshape"(%1312) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1314 = chlo.broadcast_add %1313, %1217 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1315 = chlo.broadcast_multiply %1314, %cst_225 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1316 = chlo.broadcast_add %1315, %cst_224 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1317 = "mhlo.reshape"(%1316) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1318 = "mhlo.dot"(%1317, %cst_239) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1319 = chlo.broadcast_add %1318, %cst_238 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1320 = "mhlo.reshape"(%1319) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1321 = "mhlo.transpose"(%1320) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1322 = "mhlo.dot"(%1317, %cst_243) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1323 = "mhlo.reshape"(%1322) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1324 = "mhlo.broadcast_in_dim"(%cst_242) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1325 = mhlo.add %1323, %1324 : tensor<1x384x128xf32> | |
| %1326 = chlo.broadcast_multiply %1325, %cst_241 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1327 = chlo.broadcast_add %1326, %cst_240 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1328 = "mhlo.reshape"(%1327) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1329 = "mhlo.dot"(%1328, %cst_235) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1330 = chlo.broadcast_add %1329, %cst_234 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1331 = "mhlo.reshape"(%1330) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1332 = "mhlo.transpose"(%1331) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1333 = "mhlo.dot"(%1328, %cst_237) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1334 = chlo.broadcast_add %1333, %cst_236 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1335 = "mhlo.reshape"(%1334) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1336 = "mhlo.transpose"(%1335) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1337 = "mhlo.dot_general"(%1336, %1332) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1338 = chlo.broadcast_multiply %1337, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1339 = chlo.broadcast_add %1338, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1340 = "mhlo.reduce"(%1339, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1341 = linalg.tensor_expand_shape %1340 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1342 = chlo.broadcast_subtract %1339, %1341 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1343 = "mhlo.exponential"(%1342) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1344 = "mhlo.reduce"(%1343, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1345 = linalg.tensor_expand_shape %1344 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1346 = chlo.broadcast_divide %1343, %1345 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1347 = "mhlo.dot_general"(%1346, %1321) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1348 = "mhlo.transpose"(%1347) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1349 = "mhlo.reshape"(%1348) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1350 = "mhlo.dot"(%1349, %cst_233) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1351 = chlo.broadcast_add %1350, %cst_232 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1352 = "mhlo.reshape"(%1351) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1353 = "mhlo.dot"(%1317, %cst_246) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1354 = chlo.broadcast_add %1353, %cst_245 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1355 = "mhlo.reshape"(%1354) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1356 = chlo.broadcast_multiply %1355, %cst_244 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1357 = chlo.broadcast_add %1356, %cst_232 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1358 = chlo.broadcast_add %1352, %1357 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1359 = chlo.broadcast_multiply %1358, %cst_231 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1360 = chlo.broadcast_add %1359, %cst_230 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1361 = "mhlo.reshape"(%1360) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1362 = "mhlo.dot"(%1361, %cst_248) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1363 = chlo.broadcast_add %1362, %cst_247 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1364 = "mhlo.reshape"(%1363) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1365 = chlo.broadcast_maximum %1364, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1366 = "mhlo.reshape"(%1365) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1367 = "mhlo.dot"(%1366, %cst_252) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1368 = chlo.broadcast_add %1367, %cst_251 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1369 = "mhlo.reshape"(%1368) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1370 = chlo.broadcast_add %1369, %1360 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1371 = chlo.broadcast_multiply %1370, %cst_250 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1372 = chlo.broadcast_add %1371, %cst_249 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1373 = "mhlo.reshape"(%1372) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1374 = "mhlo.dot"(%1373, %cst_254) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1375 = chlo.broadcast_add %1374, %cst_253 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1376 = "mhlo.reshape"(%1375) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1377 = chlo.broadcast_maximum %1376, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1378 = "mhlo.reshape"(%1377) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1379 = "mhlo.dot"(%1378, %cst_258) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1380 = chlo.broadcast_add %1379, %cst_257 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1381 = "mhlo.reshape"(%1380) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1382 = chlo.broadcast_add %1381, %1372 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1383 = chlo.broadcast_multiply %1382, %cst_256 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1384 = chlo.broadcast_add %1383, %cst_255 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1385 = "mhlo.reshape"(%1384) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1386 = "mhlo.dot"(%1385, %cst_260) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1387 = chlo.broadcast_add %1386, %cst_259 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1388 = "mhlo.reshape"(%1387) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1389 = chlo.broadcast_maximum %1388, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1390 = "mhlo.reshape"(%1389) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1391 = "mhlo.dot"(%1390, %cst_264) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1392 = chlo.broadcast_add %1391, %cst_263 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1393 = "mhlo.reshape"(%1392) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1394 = chlo.broadcast_add %1393, %1384 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1395 = chlo.broadcast_multiply %1394, %cst_262 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1396 = chlo.broadcast_add %1395, %cst_261 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1397 = "mhlo.reshape"(%1396) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1398 = "mhlo.dot"(%1397, %cst_266) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1399 = chlo.broadcast_add %1398, %cst_265 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1400 = "mhlo.reshape"(%1399) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1401 = chlo.broadcast_maximum %1400, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1402 = "mhlo.reshape"(%1401) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1403 = "mhlo.dot"(%1402, %cst_274) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1404 = chlo.broadcast_add %1403, %cst_273 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1405 = "mhlo.reshape"(%1404) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1406 = chlo.broadcast_add %1405, %1396 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1407 = chlo.broadcast_multiply %1406, %cst_268 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1408 = chlo.broadcast_add %1407, %cst_267 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1409 = "mhlo.reshape"(%1408) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1410 = "mhlo.dot"(%1409, %cst_272) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1411 = chlo.broadcast_add %1410, %cst_271 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1412 = "mhlo.reshape"(%1411) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1413 = chlo.broadcast_add %1412, %1316 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1414 = chlo.broadcast_multiply %1413, %cst_270 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1415 = chlo.broadcast_add %1414, %cst_269 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1416 = "mhlo.reshape"(%1415) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1417 = "mhlo.dot"(%1416, %cst_284) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1418 = chlo.broadcast_add %1417, %cst_283 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1419 = "mhlo.reshape"(%1418) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1420 = "mhlo.transpose"(%1419) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1421 = "mhlo.dot"(%1416, %cst_288) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1422 = "mhlo.reshape"(%1421) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1423 = "mhlo.broadcast_in_dim"(%cst_287) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1424 = mhlo.add %1422, %1423 : tensor<1x384x128xf32> | |
| %1425 = chlo.broadcast_multiply %1424, %cst_286 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1426 = chlo.broadcast_add %1425, %cst_285 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1427 = "mhlo.reshape"(%1426) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1428 = "mhlo.dot"(%1427, %cst_280) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1429 = chlo.broadcast_add %1428, %cst_279 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1430 = "mhlo.reshape"(%1429) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1431 = "mhlo.transpose"(%1430) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1432 = "mhlo.dot"(%1427, %cst_282) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1433 = chlo.broadcast_add %1432, %cst_281 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1434 = "mhlo.reshape"(%1433) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1435 = "mhlo.transpose"(%1434) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1436 = "mhlo.dot_general"(%1435, %1431) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1437 = chlo.broadcast_multiply %1436, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1438 = chlo.broadcast_add %1437, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1439 = "mhlo.reduce"(%1438, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1440 = linalg.tensor_expand_shape %1439 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1441 = chlo.broadcast_subtract %1438, %1440 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1442 = "mhlo.exponential"(%1441) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1443 = "mhlo.reduce"(%1442, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1444 = linalg.tensor_expand_shape %1443 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1445 = chlo.broadcast_divide %1442, %1444 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1446 = "mhlo.dot_general"(%1445, %1420) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1447 = "mhlo.transpose"(%1446) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1448 = "mhlo.reshape"(%1447) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1449 = "mhlo.dot"(%1448, %cst_278) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1450 = chlo.broadcast_add %1449, %cst_277 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1451 = "mhlo.reshape"(%1450) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1452 = "mhlo.dot"(%1416, %cst_291) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1453 = chlo.broadcast_add %1452, %cst_290 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1454 = "mhlo.reshape"(%1453) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1455 = chlo.broadcast_multiply %1454, %cst_289 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1456 = chlo.broadcast_add %1455, %cst_277 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1457 = chlo.broadcast_add %1451, %1456 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1458 = chlo.broadcast_multiply %1457, %cst_276 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1459 = chlo.broadcast_add %1458, %cst_275 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1460 = "mhlo.reshape"(%1459) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1461 = "mhlo.dot"(%1460, %cst_293) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1462 = chlo.broadcast_add %1461, %cst_292 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1463 = "mhlo.reshape"(%1462) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1464 = chlo.broadcast_maximum %1463, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1465 = "mhlo.reshape"(%1464) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1466 = "mhlo.dot"(%1465, %cst_297) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1467 = chlo.broadcast_add %1466, %cst_296 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1468 = "mhlo.reshape"(%1467) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1469 = chlo.broadcast_add %1468, %1459 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1470 = chlo.broadcast_multiply %1469, %cst_295 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1471 = chlo.broadcast_add %1470, %cst_294 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1472 = "mhlo.reshape"(%1471) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1473 = "mhlo.dot"(%1472, %cst_299) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1474 = chlo.broadcast_add %1473, %cst_298 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1475 = "mhlo.reshape"(%1474) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1476 = chlo.broadcast_maximum %1475, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1477 = "mhlo.reshape"(%1476) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1478 = "mhlo.dot"(%1477, %cst_303) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1479 = chlo.broadcast_add %1478, %cst_302 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1480 = "mhlo.reshape"(%1479) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1481 = chlo.broadcast_add %1480, %1471 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1482 = chlo.broadcast_multiply %1481, %cst_301 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1483 = chlo.broadcast_add %1482, %cst_300 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1484 = "mhlo.reshape"(%1483) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1485 = "mhlo.dot"(%1484, %cst_305) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1486 = chlo.broadcast_add %1485, %cst_304 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1487 = "mhlo.reshape"(%1486) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1488 = chlo.broadcast_maximum %1487, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1489 = "mhlo.reshape"(%1488) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1490 = "mhlo.dot"(%1489, %cst_309) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1491 = chlo.broadcast_add %1490, %cst_308 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1492 = "mhlo.reshape"(%1491) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1493 = chlo.broadcast_add %1492, %1483 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1494 = chlo.broadcast_multiply %1493, %cst_307 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1495 = chlo.broadcast_add %1494, %cst_306 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1496 = "mhlo.reshape"(%1495) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1497 = "mhlo.dot"(%1496, %cst_311) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1498 = chlo.broadcast_add %1497, %cst_310 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1499 = "mhlo.reshape"(%1498) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1500 = chlo.broadcast_maximum %1499, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1501 = "mhlo.reshape"(%1500) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1502 = "mhlo.dot"(%1501, %cst_319) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1503 = chlo.broadcast_add %1502, %cst_318 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1504 = "mhlo.reshape"(%1503) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1505 = chlo.broadcast_add %1504, %1495 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1506 = chlo.broadcast_multiply %1505, %cst_313 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1507 = chlo.broadcast_add %1506, %cst_312 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1508 = "mhlo.reshape"(%1507) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1509 = "mhlo.dot"(%1508, %cst_317) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1510 = chlo.broadcast_add %1509, %cst_316 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1511 = "mhlo.reshape"(%1510) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1512 = chlo.broadcast_add %1511, %1415 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1513 = chlo.broadcast_multiply %1512, %cst_315 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1514 = chlo.broadcast_add %1513, %cst_314 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1515 = "mhlo.reshape"(%1514) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1516 = "mhlo.dot"(%1515, %cst_329) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1517 = chlo.broadcast_add %1516, %cst_328 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1518 = "mhlo.reshape"(%1517) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1519 = "mhlo.transpose"(%1518) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1520 = "mhlo.dot"(%1515, %cst_333) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1521 = "mhlo.reshape"(%1520) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1522 = "mhlo.broadcast_in_dim"(%cst_332) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1523 = mhlo.add %1521, %1522 : tensor<1x384x128xf32> | |
| %1524 = chlo.broadcast_multiply %1523, %cst_331 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1525 = chlo.broadcast_add %1524, %cst_330 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1526 = "mhlo.reshape"(%1525) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1527 = "mhlo.dot"(%1526, %cst_325) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1528 = chlo.broadcast_add %1527, %cst_324 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1529 = "mhlo.reshape"(%1528) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1530 = "mhlo.transpose"(%1529) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1531 = "mhlo.dot"(%1526, %cst_327) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1532 = chlo.broadcast_add %1531, %cst_326 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1533 = "mhlo.reshape"(%1532) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1534 = "mhlo.transpose"(%1533) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1535 = "mhlo.dot_general"(%1534, %1530) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1536 = chlo.broadcast_multiply %1535, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1537 = chlo.broadcast_add %1536, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1538 = "mhlo.reduce"(%1537, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1539 = linalg.tensor_expand_shape %1538 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1540 = chlo.broadcast_subtract %1537, %1539 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1541 = "mhlo.exponential"(%1540) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1542 = "mhlo.reduce"(%1541, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1543 = linalg.tensor_expand_shape %1542 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1544 = chlo.broadcast_divide %1541, %1543 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1545 = "mhlo.dot_general"(%1544, %1519) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1546 = "mhlo.transpose"(%1545) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1547 = "mhlo.reshape"(%1546) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1548 = "mhlo.dot"(%1547, %cst_323) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1549 = chlo.broadcast_add %1548, %cst_322 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1550 = "mhlo.reshape"(%1549) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1551 = "mhlo.dot"(%1515, %cst_336) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1552 = chlo.broadcast_add %1551, %cst_335 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1553 = "mhlo.reshape"(%1552) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1554 = chlo.broadcast_multiply %1553, %cst_334 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1555 = chlo.broadcast_add %1554, %cst_322 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1556 = chlo.broadcast_add %1550, %1555 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1557 = chlo.broadcast_multiply %1556, %cst_321 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1558 = chlo.broadcast_add %1557, %cst_320 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1559 = "mhlo.reshape"(%1558) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1560 = "mhlo.dot"(%1559, %cst_338) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1561 = chlo.broadcast_add %1560, %cst_337 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1562 = "mhlo.reshape"(%1561) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1563 = chlo.broadcast_maximum %1562, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1564 = "mhlo.reshape"(%1563) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1565 = "mhlo.dot"(%1564, %cst_342) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1566 = chlo.broadcast_add %1565, %cst_341 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1567 = "mhlo.reshape"(%1566) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1568 = chlo.broadcast_add %1567, %1558 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1569 = chlo.broadcast_multiply %1568, %cst_340 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1570 = chlo.broadcast_add %1569, %cst_339 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1571 = "mhlo.reshape"(%1570) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1572 = "mhlo.dot"(%1571, %cst_344) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1573 = chlo.broadcast_add %1572, %cst_343 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1574 = "mhlo.reshape"(%1573) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1575 = chlo.broadcast_maximum %1574, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1576 = "mhlo.reshape"(%1575) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1577 = "mhlo.dot"(%1576, %cst_348) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1578 = chlo.broadcast_add %1577, %cst_347 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1579 = "mhlo.reshape"(%1578) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1580 = chlo.broadcast_add %1579, %1570 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1581 = chlo.broadcast_multiply %1580, %cst_346 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1582 = chlo.broadcast_add %1581, %cst_345 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1583 = "mhlo.reshape"(%1582) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1584 = "mhlo.dot"(%1583, %cst_350) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1585 = chlo.broadcast_add %1584, %cst_349 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1586 = "mhlo.reshape"(%1585) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1587 = chlo.broadcast_maximum %1586, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1588 = "mhlo.reshape"(%1587) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1589 = "mhlo.dot"(%1588, %cst_354) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1590 = chlo.broadcast_add %1589, %cst_353 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1591 = "mhlo.reshape"(%1590) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1592 = chlo.broadcast_add %1591, %1582 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1593 = chlo.broadcast_multiply %1592, %cst_352 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1594 = chlo.broadcast_add %1593, %cst_351 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1595 = "mhlo.reshape"(%1594) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1596 = "mhlo.dot"(%1595, %cst_356) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1597 = chlo.broadcast_add %1596, %cst_355 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1598 = "mhlo.reshape"(%1597) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1599 = chlo.broadcast_maximum %1598, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1600 = "mhlo.reshape"(%1599) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1601 = "mhlo.dot"(%1600, %cst_364) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1602 = chlo.broadcast_add %1601, %cst_363 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1603 = "mhlo.reshape"(%1602) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1604 = chlo.broadcast_add %1603, %1594 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1605 = chlo.broadcast_multiply %1604, %cst_358 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1606 = chlo.broadcast_add %1605, %cst_357 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1607 = "mhlo.reshape"(%1606) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1608 = "mhlo.dot"(%1607, %cst_362) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1609 = chlo.broadcast_add %1608, %cst_361 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1610 = "mhlo.reshape"(%1609) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1611 = chlo.broadcast_add %1610, %1514 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1612 = chlo.broadcast_multiply %1611, %cst_360 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1613 = chlo.broadcast_add %1612, %cst_359 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1614 = "mhlo.reshape"(%1613) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1615 = "mhlo.dot"(%1614, %cst_374) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1616 = chlo.broadcast_add %1615, %cst_373 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1617 = "mhlo.reshape"(%1616) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1618 = "mhlo.transpose"(%1617) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1619 = "mhlo.dot"(%1614, %cst_378) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1620 = "mhlo.reshape"(%1619) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1621 = "mhlo.broadcast_in_dim"(%cst_377) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1622 = mhlo.add %1620, %1621 : tensor<1x384x128xf32> | |
| %1623 = chlo.broadcast_multiply %1622, %cst_376 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1624 = chlo.broadcast_add %1623, %cst_375 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1625 = "mhlo.reshape"(%1624) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1626 = "mhlo.dot"(%1625, %cst_370) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1627 = chlo.broadcast_add %1626, %cst_369 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1628 = "mhlo.reshape"(%1627) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1629 = "mhlo.transpose"(%1628) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1630 = "mhlo.dot"(%1625, %cst_372) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1631 = chlo.broadcast_add %1630, %cst_371 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1632 = "mhlo.reshape"(%1631) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1633 = "mhlo.transpose"(%1632) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1634 = "mhlo.dot_general"(%1633, %1629) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1635 = chlo.broadcast_multiply %1634, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1636 = chlo.broadcast_add %1635, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1637 = "mhlo.reduce"(%1636, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1638 = linalg.tensor_expand_shape %1637 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1639 = chlo.broadcast_subtract %1636, %1638 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1640 = "mhlo.exponential"(%1639) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1641 = "mhlo.reduce"(%1640, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1642 = linalg.tensor_expand_shape %1641 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1643 = chlo.broadcast_divide %1640, %1642 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1644 = "mhlo.dot_general"(%1643, %1618) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1645 = "mhlo.transpose"(%1644) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1646 = "mhlo.reshape"(%1645) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1647 = "mhlo.dot"(%1646, %cst_368) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1648 = chlo.broadcast_add %1647, %cst_367 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1649 = "mhlo.reshape"(%1648) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1650 = "mhlo.dot"(%1614, %cst_381) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1651 = chlo.broadcast_add %1650, %cst_380 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1652 = "mhlo.reshape"(%1651) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1653 = chlo.broadcast_multiply %1652, %cst_379 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1654 = chlo.broadcast_add %1653, %cst_367 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1655 = chlo.broadcast_add %1649, %1654 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1656 = chlo.broadcast_multiply %1655, %cst_366 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1657 = chlo.broadcast_add %1656, %cst_365 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1658 = "mhlo.reshape"(%1657) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1659 = "mhlo.dot"(%1658, %cst_383) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1660 = chlo.broadcast_add %1659, %cst_382 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1661 = "mhlo.reshape"(%1660) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1662 = chlo.broadcast_maximum %1661, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1663 = "mhlo.reshape"(%1662) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1664 = "mhlo.dot"(%1663, %cst_387) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1665 = chlo.broadcast_add %1664, %cst_386 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1666 = "mhlo.reshape"(%1665) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1667 = chlo.broadcast_add %1666, %1657 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1668 = chlo.broadcast_multiply %1667, %cst_385 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1669 = chlo.broadcast_add %1668, %cst_384 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1670 = "mhlo.reshape"(%1669) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1671 = "mhlo.dot"(%1670, %cst_389) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1672 = chlo.broadcast_add %1671, %cst_388 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1673 = "mhlo.reshape"(%1672) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1674 = chlo.broadcast_maximum %1673, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1675 = "mhlo.reshape"(%1674) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1676 = "mhlo.dot"(%1675, %cst_393) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1677 = chlo.broadcast_add %1676, %cst_392 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1678 = "mhlo.reshape"(%1677) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1679 = chlo.broadcast_add %1678, %1669 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1680 = chlo.broadcast_multiply %1679, %cst_391 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1681 = chlo.broadcast_add %1680, %cst_390 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1682 = "mhlo.reshape"(%1681) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1683 = "mhlo.dot"(%1682, %cst_395) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1684 = chlo.broadcast_add %1683, %cst_394 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1685 = "mhlo.reshape"(%1684) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1686 = chlo.broadcast_maximum %1685, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1687 = "mhlo.reshape"(%1686) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1688 = "mhlo.dot"(%1687, %cst_399) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1689 = chlo.broadcast_add %1688, %cst_398 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1690 = "mhlo.reshape"(%1689) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1691 = chlo.broadcast_add %1690, %1681 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1692 = chlo.broadcast_multiply %1691, %cst_397 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1693 = chlo.broadcast_add %1692, %cst_396 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1694 = "mhlo.reshape"(%1693) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1695 = "mhlo.dot"(%1694, %cst_401) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1696 = chlo.broadcast_add %1695, %cst_400 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1697 = "mhlo.reshape"(%1696) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1698 = chlo.broadcast_maximum %1697, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1699 = "mhlo.reshape"(%1698) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1700 = "mhlo.dot"(%1699, %cst_409) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1701 = chlo.broadcast_add %1700, %cst_408 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1702 = "mhlo.reshape"(%1701) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1703 = chlo.broadcast_add %1702, %1693 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1704 = chlo.broadcast_multiply %1703, %cst_403 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1705 = chlo.broadcast_add %1704, %cst_402 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1706 = "mhlo.reshape"(%1705) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1707 = "mhlo.dot"(%1706, %cst_407) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1708 = chlo.broadcast_add %1707, %cst_406 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1709 = "mhlo.reshape"(%1708) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1710 = chlo.broadcast_add %1709, %1613 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1711 = chlo.broadcast_multiply %1710, %cst_405 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1712 = chlo.broadcast_add %1711, %cst_404 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1713 = "mhlo.reshape"(%1712) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1714 = "mhlo.dot"(%1713, %cst_419) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1715 = chlo.broadcast_add %1714, %cst_418 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1716 = "mhlo.reshape"(%1715) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1717 = "mhlo.transpose"(%1716) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1718 = "mhlo.dot"(%1713, %cst_423) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1719 = "mhlo.reshape"(%1718) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1720 = "mhlo.broadcast_in_dim"(%cst_422) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1721 = mhlo.add %1719, %1720 : tensor<1x384x128xf32> | |
| %1722 = chlo.broadcast_multiply %1721, %cst_421 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1723 = chlo.broadcast_add %1722, %cst_420 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1724 = "mhlo.reshape"(%1723) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1725 = "mhlo.dot"(%1724, %cst_415) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1726 = chlo.broadcast_add %1725, %cst_414 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1727 = "mhlo.reshape"(%1726) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1728 = "mhlo.transpose"(%1727) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1729 = "mhlo.dot"(%1724, %cst_417) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1730 = chlo.broadcast_add %1729, %cst_416 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1731 = "mhlo.reshape"(%1730) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1732 = "mhlo.transpose"(%1731) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1733 = "mhlo.dot_general"(%1732, %1728) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1734 = chlo.broadcast_multiply %1733, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1735 = chlo.broadcast_add %1734, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1736 = "mhlo.reduce"(%1735, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1737 = linalg.tensor_expand_shape %1736 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1738 = chlo.broadcast_subtract %1735, %1737 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1739 = "mhlo.exponential"(%1738) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1740 = "mhlo.reduce"(%1739, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1741 = linalg.tensor_expand_shape %1740 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1742 = chlo.broadcast_divide %1739, %1741 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1743 = "mhlo.dot_general"(%1742, %1717) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1744 = "mhlo.transpose"(%1743) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1745 = "mhlo.reshape"(%1744) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1746 = "mhlo.dot"(%1745, %cst_413) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1747 = chlo.broadcast_add %1746, %cst_412 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1748 = "mhlo.reshape"(%1747) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1749 = "mhlo.dot"(%1713, %cst_426) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1750 = chlo.broadcast_add %1749, %cst_425 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1751 = "mhlo.reshape"(%1750) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1752 = chlo.broadcast_multiply %1751, %cst_424 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1753 = chlo.broadcast_add %1752, %cst_412 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1754 = chlo.broadcast_add %1748, %1753 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1755 = chlo.broadcast_multiply %1754, %cst_411 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1756 = chlo.broadcast_add %1755, %cst_410 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1757 = "mhlo.reshape"(%1756) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1758 = "mhlo.dot"(%1757, %cst_428) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1759 = chlo.broadcast_add %1758, %cst_427 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1760 = "mhlo.reshape"(%1759) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1761 = chlo.broadcast_maximum %1760, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1762 = "mhlo.reshape"(%1761) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1763 = "mhlo.dot"(%1762, %cst_432) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1764 = chlo.broadcast_add %1763, %cst_431 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1765 = "mhlo.reshape"(%1764) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1766 = chlo.broadcast_add %1765, %1756 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1767 = chlo.broadcast_multiply %1766, %cst_430 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1768 = chlo.broadcast_add %1767, %cst_429 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1769 = "mhlo.reshape"(%1768) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1770 = "mhlo.dot"(%1769, %cst_434) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1771 = chlo.broadcast_add %1770, %cst_433 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1772 = "mhlo.reshape"(%1771) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1773 = chlo.broadcast_maximum %1772, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1774 = "mhlo.reshape"(%1773) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1775 = "mhlo.dot"(%1774, %cst_438) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1776 = chlo.broadcast_add %1775, %cst_437 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1777 = "mhlo.reshape"(%1776) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1778 = chlo.broadcast_add %1777, %1768 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1779 = chlo.broadcast_multiply %1778, %cst_436 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1780 = chlo.broadcast_add %1779, %cst_435 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1781 = "mhlo.reshape"(%1780) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1782 = "mhlo.dot"(%1781, %cst_440) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1783 = chlo.broadcast_add %1782, %cst_439 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1784 = "mhlo.reshape"(%1783) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1785 = chlo.broadcast_maximum %1784, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1786 = "mhlo.reshape"(%1785) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1787 = "mhlo.dot"(%1786, %cst_444) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1788 = chlo.broadcast_add %1787, %cst_443 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1789 = "mhlo.reshape"(%1788) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1790 = chlo.broadcast_add %1789, %1780 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1791 = chlo.broadcast_multiply %1790, %cst_442 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1792 = chlo.broadcast_add %1791, %cst_441 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1793 = "mhlo.reshape"(%1792) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1794 = "mhlo.dot"(%1793, %cst_446) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1795 = chlo.broadcast_add %1794, %cst_445 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1796 = "mhlo.reshape"(%1795) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1797 = chlo.broadcast_maximum %1796, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1798 = "mhlo.reshape"(%1797) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1799 = "mhlo.dot"(%1798, %cst_454) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1800 = chlo.broadcast_add %1799, %cst_453 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1801 = "mhlo.reshape"(%1800) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1802 = chlo.broadcast_add %1801, %1792 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1803 = chlo.broadcast_multiply %1802, %cst_448 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1804 = chlo.broadcast_add %1803, %cst_447 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1805 = "mhlo.reshape"(%1804) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1806 = "mhlo.dot"(%1805, %cst_452) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1807 = chlo.broadcast_add %1806, %cst_451 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1808 = "mhlo.reshape"(%1807) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1809 = chlo.broadcast_add %1808, %1712 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1810 = chlo.broadcast_multiply %1809, %cst_450 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1811 = chlo.broadcast_add %1810, %cst_449 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1812 = "mhlo.reshape"(%1811) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1813 = "mhlo.dot"(%1812, %cst_464) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1814 = chlo.broadcast_add %1813, %cst_463 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1815 = "mhlo.reshape"(%1814) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1816 = "mhlo.transpose"(%1815) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1817 = "mhlo.dot"(%1812, %cst_468) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1818 = "mhlo.reshape"(%1817) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1819 = "mhlo.broadcast_in_dim"(%cst_467) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1820 = mhlo.add %1818, %1819 : tensor<1x384x128xf32> | |
| %1821 = chlo.broadcast_multiply %1820, %cst_466 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1822 = chlo.broadcast_add %1821, %cst_465 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1823 = "mhlo.reshape"(%1822) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1824 = "mhlo.dot"(%1823, %cst_460) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1825 = chlo.broadcast_add %1824, %cst_459 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1826 = "mhlo.reshape"(%1825) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1827 = "mhlo.transpose"(%1826) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1828 = "mhlo.dot"(%1823, %cst_462) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1829 = chlo.broadcast_add %1828, %cst_461 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1830 = "mhlo.reshape"(%1829) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1831 = "mhlo.transpose"(%1830) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1832 = "mhlo.dot_general"(%1831, %1827) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1833 = chlo.broadcast_multiply %1832, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1834 = chlo.broadcast_add %1833, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1835 = "mhlo.reduce"(%1834, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1836 = linalg.tensor_expand_shape %1835 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1837 = chlo.broadcast_subtract %1834, %1836 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1838 = "mhlo.exponential"(%1837) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1839 = "mhlo.reduce"(%1838, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1840 = linalg.tensor_expand_shape %1839 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1841 = chlo.broadcast_divide %1838, %1840 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1842 = "mhlo.dot_general"(%1841, %1816) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1843 = "mhlo.transpose"(%1842) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1844 = "mhlo.reshape"(%1843) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1845 = "mhlo.dot"(%1844, %cst_458) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1846 = chlo.broadcast_add %1845, %cst_457 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1847 = "mhlo.reshape"(%1846) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1848 = "mhlo.dot"(%1812, %cst_471) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1849 = chlo.broadcast_add %1848, %cst_470 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1850 = "mhlo.reshape"(%1849) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1851 = chlo.broadcast_multiply %1850, %cst_469 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1852 = chlo.broadcast_add %1851, %cst_457 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1853 = chlo.broadcast_add %1847, %1852 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1854 = chlo.broadcast_multiply %1853, %cst_456 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1855 = chlo.broadcast_add %1854, %cst_455 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1856 = "mhlo.reshape"(%1855) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1857 = "mhlo.dot"(%1856, %cst_473) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1858 = chlo.broadcast_add %1857, %cst_472 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1859 = "mhlo.reshape"(%1858) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1860 = chlo.broadcast_maximum %1859, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1861 = "mhlo.reshape"(%1860) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1862 = "mhlo.dot"(%1861, %cst_477) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1863 = chlo.broadcast_add %1862, %cst_476 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1864 = "mhlo.reshape"(%1863) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1865 = chlo.broadcast_add %1864, %1855 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1866 = chlo.broadcast_multiply %1865, %cst_475 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1867 = chlo.broadcast_add %1866, %cst_474 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1868 = "mhlo.reshape"(%1867) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1869 = "mhlo.dot"(%1868, %cst_479) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1870 = chlo.broadcast_add %1869, %cst_478 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1871 = "mhlo.reshape"(%1870) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1872 = chlo.broadcast_maximum %1871, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1873 = "mhlo.reshape"(%1872) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1874 = "mhlo.dot"(%1873, %cst_483) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1875 = chlo.broadcast_add %1874, %cst_482 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1876 = "mhlo.reshape"(%1875) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1877 = chlo.broadcast_add %1876, %1867 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1878 = chlo.broadcast_multiply %1877, %cst_481 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1879 = chlo.broadcast_add %1878, %cst_480 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1880 = "mhlo.reshape"(%1879) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1881 = "mhlo.dot"(%1880, %cst_485) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1882 = chlo.broadcast_add %1881, %cst_484 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1883 = "mhlo.reshape"(%1882) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1884 = chlo.broadcast_maximum %1883, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1885 = "mhlo.reshape"(%1884) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1886 = "mhlo.dot"(%1885, %cst_489) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1887 = chlo.broadcast_add %1886, %cst_488 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1888 = "mhlo.reshape"(%1887) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1889 = chlo.broadcast_add %1888, %1879 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1890 = chlo.broadcast_multiply %1889, %cst_487 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1891 = chlo.broadcast_add %1890, %cst_486 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1892 = "mhlo.reshape"(%1891) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1893 = "mhlo.dot"(%1892, %cst_491) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1894 = chlo.broadcast_add %1893, %cst_490 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1895 = "mhlo.reshape"(%1894) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1896 = chlo.broadcast_maximum %1895, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1897 = "mhlo.reshape"(%1896) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1898 = "mhlo.dot"(%1897, %cst_499) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1899 = chlo.broadcast_add %1898, %cst_498 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1900 = "mhlo.reshape"(%1899) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1901 = chlo.broadcast_add %1900, %1891 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1902 = chlo.broadcast_multiply %1901, %cst_493 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1903 = chlo.broadcast_add %1902, %cst_492 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1904 = "mhlo.reshape"(%1903) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1905 = "mhlo.dot"(%1904, %cst_497) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1906 = chlo.broadcast_add %1905, %cst_496 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1907 = "mhlo.reshape"(%1906) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1908 = chlo.broadcast_add %1907, %1811 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %1909 = chlo.broadcast_multiply %1908, %cst_495 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1910 = chlo.broadcast_add %1909, %cst_494 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %1911 = "mhlo.reshape"(%1910) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1912 = "mhlo.dot"(%1911, %cst_509) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1913 = chlo.broadcast_add %1912, %cst_508 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1914 = "mhlo.reshape"(%1913) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1915 = "mhlo.transpose"(%1914) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1916 = "mhlo.dot"(%1911, %cst_513) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1917 = "mhlo.reshape"(%1916) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1918 = "mhlo.broadcast_in_dim"(%cst_512) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1919 = mhlo.add %1917, %1918 : tensor<1x384x128xf32> | |
| %1920 = chlo.broadcast_multiply %1919, %cst_511 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1921 = chlo.broadcast_add %1920, %cst_510 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1922 = "mhlo.reshape"(%1921) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1923 = "mhlo.dot"(%1922, %cst_505) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1924 = chlo.broadcast_add %1923, %cst_504 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1925 = "mhlo.reshape"(%1924) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1926 = "mhlo.transpose"(%1925) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1927 = "mhlo.dot"(%1922, %cst_507) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1928 = chlo.broadcast_add %1927, %cst_506 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1929 = "mhlo.reshape"(%1928) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %1930 = "mhlo.transpose"(%1929) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1931 = "mhlo.dot_general"(%1930, %1926) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32> | |
| %1932 = chlo.broadcast_multiply %1931, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32> | |
| %1933 = chlo.broadcast_add %1932, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1934 = "mhlo.reduce"(%1933, %4) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.maximum %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1935 = linalg.tensor_expand_shape %1934 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1936 = chlo.broadcast_subtract %1933, %1935 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1937 = "mhlo.exponential"(%1936) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32> | |
| %1938 = "mhlo.reduce"(%1937, %5) ( { | |
| ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors | |
| %2417 = mhlo.add %arg3, %arg4 : tensor<f32> | |
| "mhlo.return"(%2417) : (tensor<f32>) -> () | |
| }) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32> | |
| %1939 = linalg.tensor_expand_shape %1938 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32> | |
| %1940 = chlo.broadcast_divide %1937, %1939 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32> | |
| %1941 = "mhlo.dot_general"(%1940, %1915) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32> | |
| %1942 = "mhlo.transpose"(%1941) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32> | |
| %1943 = "mhlo.reshape"(%1942) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32> | |
| %1944 = "mhlo.dot"(%1943, %cst_503) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %1945 = chlo.broadcast_add %1944, %cst_502 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1946 = "mhlo.reshape"(%1945) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1947 = "mhlo.dot"(%1911, %cst_516) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1948 = chlo.broadcast_add %1947, %cst_515 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1949 = "mhlo.reshape"(%1948) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1950 = chlo.broadcast_multiply %1949, %cst_514 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1951 = chlo.broadcast_add %1950, %cst_502 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1952 = chlo.broadcast_add %1946, %1951 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1953 = chlo.broadcast_multiply %1952, %cst_501 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1954 = chlo.broadcast_add %1953, %cst_500 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1955 = "mhlo.reshape"(%1954) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1956 = "mhlo.dot"(%1955, %cst_518) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1957 = chlo.broadcast_add %1956, %cst_517 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1958 = "mhlo.reshape"(%1957) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1959 = chlo.broadcast_maximum %1958, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1960 = "mhlo.reshape"(%1959) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1961 = "mhlo.dot"(%1960, %cst_522) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1962 = chlo.broadcast_add %1961, %cst_521 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1963 = "mhlo.reshape"(%1962) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1964 = chlo.broadcast_add %1963, %1954 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1965 = chlo.broadcast_multiply %1964, %cst_520 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1966 = chlo.broadcast_add %1965, %cst_519 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1967 = "mhlo.reshape"(%1966) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1968 = "mhlo.dot"(%1967, %cst_524) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1969 = chlo.broadcast_add %1968, %cst_523 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1970 = "mhlo.reshape"(%1969) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1971 = chlo.broadcast_maximum %1970, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1972 = "mhlo.reshape"(%1971) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1973 = "mhlo.dot"(%1972, %cst_528) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1974 = chlo.broadcast_add %1973, %cst_527 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1975 = "mhlo.reshape"(%1974) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1976 = chlo.broadcast_add %1975, %1966 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1977 = chlo.broadcast_multiply %1976, %cst_526 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1978 = chlo.broadcast_add %1977, %cst_525 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1979 = "mhlo.reshape"(%1978) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1980 = "mhlo.dot"(%1979, %cst_530) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1981 = chlo.broadcast_add %1980, %cst_529 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1982 = "mhlo.reshape"(%1981) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1983 = chlo.broadcast_maximum %1982, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1984 = "mhlo.reshape"(%1983) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1985 = "mhlo.dot"(%1984, %cst_534) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1986 = chlo.broadcast_add %1985, %cst_533 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1987 = "mhlo.reshape"(%1986) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %1988 = chlo.broadcast_add %1987, %1978 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %1989 = chlo.broadcast_multiply %1988, %cst_532 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1990 = chlo.broadcast_add %1989, %cst_531 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %1991 = "mhlo.reshape"(%1990) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %1992 = "mhlo.dot"(%1991, %cst_536) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %1993 = chlo.broadcast_add %1992, %cst_535 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %1994 = "mhlo.reshape"(%1993) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %1995 = chlo.broadcast_maximum %1994, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32> | |
| %1996 = "mhlo.reshape"(%1995) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %1997 = "mhlo.dot"(%1996, %cst_544) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %1998 = chlo.broadcast_add %1997, %cst_543 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %1999 = "mhlo.reshape"(%1998) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2000 = chlo.broadcast_add %1999, %1990 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32> | |
| %2001 = chlo.broadcast_multiply %2000, %cst_538 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2002 = chlo.broadcast_add %2001, %cst_537 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2003 = "mhlo.reshape"(%2002) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2004 = "mhlo.dot"(%2003, %cst_542) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32> | |
| %2005 = chlo.broadcast_add %2004, %cst_541 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32> | |
| %2006 = "mhlo.reshape"(%2005) : (tensor<384x512xf32>) -> tensor<1x384x512xf32> | |
| %2007 = chlo.broadcast_add %2006, %1910 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32> | |
| %2008 = chlo.broadcast_multiply %2007, %cst_540 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2009 = chlo.broadcast_add %2008, %cst_539 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32> | |
| %2010 = "mhlo.reshape"(%2009) : (tensor<1x384x512xf32>) -> tensor<384x512xf32> | |
| %2011 = "mhlo.dot"(%2010, %cst_599) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2012 = chlo.broadcast_add %2011, %cst_598 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2013 = "mhlo.reshape"(%2012) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2014 = "mhlo.transpose"(%2013) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2015 = "mhlo.dot"(%2010, %cst_603) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32> | |
| %2016 = "mhlo.reshape"(%2015) : (tensor<384x128xf32>) -> tensor<1x384x128xf32> | |
| %2017 = "mhlo.broadcast_in_dim"(%cst_602) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2018 = mhlo.add %2016, %2017 : tensor<1x384x128xf32> | |
| %2019 = chlo.broadcast_multiply %2018, %cst_601 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2020 = chlo.broadcast_add %2019, %cst_600 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32> | |
| %2021 = "mhlo.reshape"(%2020) : (tensor<1x384x128xf32>) -> tensor<384x128xf32> | |
| %2022 = "mhlo.dot"(%2021, %cst_595) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2023 = chlo.broadcast_add %2022, %cst_594 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2024 = "mhlo.reshape"(%2023) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2025 = "mhlo.transpose"(%2024) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2026 = "mhlo.dot"(%2021, %cst_597) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32> | |
| %2027 = chlo.broadcast_add %2026, %cst_596 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32> | |
| %2028 = "mhlo.reshape"(%2027) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32> | |
| %2029 = "mhlo.transpose"(%2028) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32> | |
| %2030 = "mhlo.dot_general"(%2029, %2025) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contract |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment