Skip to content

Instantly share code, notes, and snippets.

@antiagainst
Last active August 9, 2021 21:46
Show Gist options
  • Save antiagainst/ebadbfa490d38636f2eea4b1164c7726 to your computer and use it in GitHub Desktop.
Save antiagainst/ebadbfa490d38636f2eea4b1164c7726 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
builtin.module {
flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/kernel" opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/position_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<512x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/token_type_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/embeddings/word_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_10/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_11/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_12/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_13/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_14/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_15/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_16/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_17/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_18/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_19/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_20/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_21/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_22/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_23/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_3/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_4/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_5/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_6/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_7/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_8/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/key/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/key/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/query/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/query/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/value/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/attention/self/value/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/attention/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/bottleneck/input/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_0/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_1/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/ffn_layer_2/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/intermediate/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/intermediate/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/bottleneck/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/dense/bias" opaque<"_", "0xDEADBEEF"> : tensor<128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_bert/encoder/layer_9/output/dense/kernel" opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_cls/squad/output_bias" dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> attributes {sym_visibility = "private"}
flow.variable @"__iree_flow_cls/squad/output_weights" opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> attributes {sym_visibility = "private"}
builtin.func @serving_default(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view) attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22named\22,\22segment_ids\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_mask\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_ids\22,[\22ndarray\22,\22i32\22,2,1,384]]],\22r\22:[[\22sdict\22,[\22end_logits\22,[\22ndarray\22,\22f32\22,2,1,384]],[\22start_logits\22,[\22ndarray\22,\22f32\22,2,1,384]]]],\22v\22:1}"}} {
%0:2 = call @_serving_default(%arg0, %arg1, %arg2) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view)
return %0#0, %0#1 : !hal.buffer_view, !hal.buffer_view
}
builtin.func private @_serving_default(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view) attributes {iree.abi = "{\22a\22:[[\22named\22,\22segment_ids\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_mask\22,[\22ndarray\22,\22i32\22,2,1,384]],[\22named\22,\22input_ids\22,[\22ndarray\22,\22i32\22,2,1,384]]],\22r\22:[[\22sdict\22,[\22end_logits\22,[\22ndarray\22,\22f32\22,2,1,384]],[\22start_logits\22,[\22ndarray\22,\22f32\22,2,1,384]]]],\22v\22:1}"} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x384xi32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<1x384xi32>
%2 = hal.tensor.cast %arg2 : !hal.buffer_view -> tensor<1x384xi32>
%3:2 = call @serving_default__ireesm(%0, %1, %2) : (tensor<1x384xi32>, tensor<1x384xi32>, tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>)
%4 = hal.tensor.cast %3#0 : tensor<1x384xf32> -> !hal.buffer_view
%5 = hal.tensor.cast %3#1 : tensor<1x384xf32> -> !hal.buffer_view
return %4, %5 : !hal.buffer_view, !hal.buffer_view
}
builtin.func private @serving_default__ireesm(%arg0: tensor<1x384xi32>, %arg1: tensor<1x384xi32>, %arg2: tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>) attributes {tf.entry_function = {control_outputs = "", inputs = "segment_ids:0,input_mask:0,input_ids:0", outputs = "end_logits:0,start_logits:0"}} {
%cst = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32>
%cst_0 = constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32>
%cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_244 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_245 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_246 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_247 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_248 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_249 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_250 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_251 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_252 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_253 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_254 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_255 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_256 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_257 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_258 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_259 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_260 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_261 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_262 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_263 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_264 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_265 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_266 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_267 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_268 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_269 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_270 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_271 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_272 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_273 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_274 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_275 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_276 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_277 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_278 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_279 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_280 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_281 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_282 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_283 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_284 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_285 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_286 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_287 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_288 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_289 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_290 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_291 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_292 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_293 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_294 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_295 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_296 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_297 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_298 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_299 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_300 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_301 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_302 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_303 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_304 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_305 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_306 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_307 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_308 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_309 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_310 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_311 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_312 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_313 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_314 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_315 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_316 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_317 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_318 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_319 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_320 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_321 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_322 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_323 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_324 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_325 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_326 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_327 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_328 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_329 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_330 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_331 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_332 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_333 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_334 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_335 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_336 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_337 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_338 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_339 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_340 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_341 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_342 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_343 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_344 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_345 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_346 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_347 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_348 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_349 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_350 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_351 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_352 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_353 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_354 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_355 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_356 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_357 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_358 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_359 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_360 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_361 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_362 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_363 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_364 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_365 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_366 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_367 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_368 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_369 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_370 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_371 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_372 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_373 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_374 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_375 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_376 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_377 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_378 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_379 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_380 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_381 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_382 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_383 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_384 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_385 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_386 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_387 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_388 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_389 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_390 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_391 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_392 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_393 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_394 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_395 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_396 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_397 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_398 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_399 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_400 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_401 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_402 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_403 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_404 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_405 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_406 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_407 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_408 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_409 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_410 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_411 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_412 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_413 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_414 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_415 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_416 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_417 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_418 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_419 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_420 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_421 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_422 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_423 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_424 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_425 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_426 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_427 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_428 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_429 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_430 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_431 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_432 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_433 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_434 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_435 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_436 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_437 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_438 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_439 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_440 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_441 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_442 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_443 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_444 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_445 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_446 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_447 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_448 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_449 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_450 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_451 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_452 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_453 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_454 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_455 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_456 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_457 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_458 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_459 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_460 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_461 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_462 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_463 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_464 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_465 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_466 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_467 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_468 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_469 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_470 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_471 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_472 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_473 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_474 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_475 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_476 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_477 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_478 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_479 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_480 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_481 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_482 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_483 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_484 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_485 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_486 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_487 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_488 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_489 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_490 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_491 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_492 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_493 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_494 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_495 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_496 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_497 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_498 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_499 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_500 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_501 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_502 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_503 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_504 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_505 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_506 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_507 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_508 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_509 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_510 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_511 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_512 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_513 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_514 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_515 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_516 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_517 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_518 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_519 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_520 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_521 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_522 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_523 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_524 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_525 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_526 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_527 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_528 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_529 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_530 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_531 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_532 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_533 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_534 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_535 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_536 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_537 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_538 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_539 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_540 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_541 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_542 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_543 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_544 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_545 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_546 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_547 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_548 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_549 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_550 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_551 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_552 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_553 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_554 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_555 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_556 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_557 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_558 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_559 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_560 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_561 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_562 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_563 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_564 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_565 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_566 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_567 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_568 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_569 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_570 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_571 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_572 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_573 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_574 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_575 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_576 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_577 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_578 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_579 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_580 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_581 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_582 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_583 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_584 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_585 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_586 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_587 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_588 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_589 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_590 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_591 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_592 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_593 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_594 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_595 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_596 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_597 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_598 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_599 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_600 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_601 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_602 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_603 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_604 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_605 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_606 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_607 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_608 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_609 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_610 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_611 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_612 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_613 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_614 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_615 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_616 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_617 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_618 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_619 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_620 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_621 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_622 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_623 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_624 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_625 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_626 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_627 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_628 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_629 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_630 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_631 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_632 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_633 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_634 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_635 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_636 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_637 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_638 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_639 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_640 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_641 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_642 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_643 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_644 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_645 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_646 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_647 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_648 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_649 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_650 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_651 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_652 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_653 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_654 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_655 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_656 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_657 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_658 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_659 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_660 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_661 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_662 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_663 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_664 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_665 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_666 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_667 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_668 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_669 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_670 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_671 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_672 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_673 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_674 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_675 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_676 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_677 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_678 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_679 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_680 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_681 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_682 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_683 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_684 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_685 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_686 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_687 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_688 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_689 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_690 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_691 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_692 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_693 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_694 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_695 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_696 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_697 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_698 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_699 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_700 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_701 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_702 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_703 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_704 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_705 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_706 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_707 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_708 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_709 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_710 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_711 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_712 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_713 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_714 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_715 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_716 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_717 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_718 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_719 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_720 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_721 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_722 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_723 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_724 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_725 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_726 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_727 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_728 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_729 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_730 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_731 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_732 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_733 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_734 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_735 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_736 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_737 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_738 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_739 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_740 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_741 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_742 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_743 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_744 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_745 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_746 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_747 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_748 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_749 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_750 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_751 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_752 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_753 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_754 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_755 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_756 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_757 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_758 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_759 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_760 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_761 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_762 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_763 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_764 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_765 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_766 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_767 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_768 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_769 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_770 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_771 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_772 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_773 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_774 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_775 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_776 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_777 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_778 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_779 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_780 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_781 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_782 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_783 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_784 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_785 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_786 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_787 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_788 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_789 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_790 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_791 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_792 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_793 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_794 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_795 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_796 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_797 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_798 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_799 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_800 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_801 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_802 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_803 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_804 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_805 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_806 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_807 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_808 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_809 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_810 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_811 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_812 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_813 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_814 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_815 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_816 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_817 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_818 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_819 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_820 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_821 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_822 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_823 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_824 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_825 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_826 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_827 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_828 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_829 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_830 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_831 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_832 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_833 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_834 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_835 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_836 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_837 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_838 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_839 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_840 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_841 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_842 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_843 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_844 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_845 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_846 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_847 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_848 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_849 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_850 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_851 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_852 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_853 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_854 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_855 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_856 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_857 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_858 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_859 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_860 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_861 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_862 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_863 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_864 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_865 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_866 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_867 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_868 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_869 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_870 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_871 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_872 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_873 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_874 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_875 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_876 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_877 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_878 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_879 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_880 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_881 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_882 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_883 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_884 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_885 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_886 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_887 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_888 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_889 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_890 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_891 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_892 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_893 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_894 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_895 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_896 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_897 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_898 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_899 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_900 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_901 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_902 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_903 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_904 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_905 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_906 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_907 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_908 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_909 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_910 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_911 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_912 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_913 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_914 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_915 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_916 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_917 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_918 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_919 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_920 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_921 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_922 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_923 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_924 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_925 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_926 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_927 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_928 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_929 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_930 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_931 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_932 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_933 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_934 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_935 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_936 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_937 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_938 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_939 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_940 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_941 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_942 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_943 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_944 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_945 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_946 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_947 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_948 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_949 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_950 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_951 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_952 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_953 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_954 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_955 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_956 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_957 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_958 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_959 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_960 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_961 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_962 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_963 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_964 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_965 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_966 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_967 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_968 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_969 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_970 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_971 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_972 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_973 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_974 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_975 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_976 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_977 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_978 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_979 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_980 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_981 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_982 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_983 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_984 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_985 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_986 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_987 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_988 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_989 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_990 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_991 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_992 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_993 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_994 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_995 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_996 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_997 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_998 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_999 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1000 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1001 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1002 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1003 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1004 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1005 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1006 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1007 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1008 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1009 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1010 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1011 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1012 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1013 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1014 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1015 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1016 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1017 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1018 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1019 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1020 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1021 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1022 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1023 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1024 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1025 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1026 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1027 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1028 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1029 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1030 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1031 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1032 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1033 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1034 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1035 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1036 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1037 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1038 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1039 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1040 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1041 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1042 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1043 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1044 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1045 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1046 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1047 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1048 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1049 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1050 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1051 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1052 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1053 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1054 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1055 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1056 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1057 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1058 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1059 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1060 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1061 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1062 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1063 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1064 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1065 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1066 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1067 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1068 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1069 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1070 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1071 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1072 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1073 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1074 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1075 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1076 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1077 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1078 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1079 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1080 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1081 = constant opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32>
%cst_1082 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32>
%0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1x384x512xf32>
%cst_1083 = constant opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32>
%cst_1084 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1085 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1086 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%1 = mhlo.constant dense<0.000000e+00> : tensor<f32>
%2 = mhlo.constant dense<0xFF800000> : tensor<f32>
%3 = mhlo.constant dense<-1.000000e+04> : tensor<f32>
%4 = mhlo.constant dense<0.176776692> : tensor<f32>
%5 = mhlo.constant dense<1.000000e+04> : tensor<f32>
%6 = mhlo.constant dense<1.000000e+00> : tensor<1x384x1xf32>
%7 = linalg.tensor_expand_shape %arg2 [[0], [1, 2]] : tensor<1x384xi32> into tensor<1x384x1xi32>
%8 = "mhlo.torch_index_select"(%cst_1081, %7) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<30522x128xf32>, tensor<1x384x1xi32>) -> tensor<1x384x1x128xf32>
%9 = "mhlo.reshape"(%8) : (tensor<1x384x1x128xf32>) -> tensor<1x384x128xf32>
%10 = "mhlo.slice"(%9) {limit_indices = dense<[1, 384, 128]> : tensor<3xi64>, start_indices = dense<[0, 1, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32>
%11 = "mhlo.pad"(%10, %1) {edge_padding_high = dense<[0, 1, 0]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32>
%12 = "mhlo.slice"(%9) {limit_indices = dense<[1, 383, 128]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32>
%13 = "mhlo.pad"(%12, %1) {edge_padding_high = dense<0> : tensor<3xi64>, edge_padding_low = dense<[0, 1, 0]> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32>
%14 = "mhlo.concatenate"(%11, %9, %13) {dimension = 2 : i64} : (tensor<1x384x128xf32>, tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x384xf32>
%15 = "mhlo.reshape"(%14) : (tensor<1x384x384xf32>) -> tensor<384x384xf32>
%16 = "mhlo.dot"(%15, %cst_1083) : (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32>
%17 = chlo.broadcast_add %16, %cst_1084 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%18 = "mhlo.reshape"(%17) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%19 = "mhlo.convert"(%arg1) : (tensor<1x384xi32>) -> tensor<1x384xf32>
%20 = "mhlo.reshape"(%19) : (tensor<1x384xf32>) -> tensor<1x1x384xf32>
%21 = chlo.broadcast_multiply %20, %6 : (tensor<1x1x384xf32>, tensor<1x384x1xf32>) -> tensor<1x384x384xf32>
%22 = linalg.tensor_expand_shape %21 [[0], [1, 2], [3]] : tensor<1x384x384xf32> into tensor<1x1x384x384xf32>
%23 = chlo.broadcast_multiply %22, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32>
%24 = chlo.broadcast_add %23, %3 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32>
%25 = "mhlo.torch_index_select"(%cst_1082, %arg0) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<2x512xf32>, tensor<1x384xi32>) -> tensor<1x384x512xf32>
%26 = chlo.broadcast_add %18, %25 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%27 = chlo.broadcast_add %26, %0 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%28 = chlo.broadcast_multiply %27, %cst_1085 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%29 = chlo.broadcast_add %28, %cst_1086 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%30 = "mhlo.reshape"(%29) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%31 = "mhlo.dot"(%30, %cst_1071) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%32 = chlo.broadcast_add %31, %cst_1072 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%33 = "mhlo.reshape"(%32) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%34 = "mhlo.transpose"(%33) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%35 = "mhlo.dot"(%30, %cst_1067) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%36 = "mhlo.reshape"(%35) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%37 = "mhlo.broadcast_in_dim"(%cst_1068) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%38 = mhlo.add %36, %37 : tensor<1x384x128xf32>
%39 = chlo.broadcast_multiply %38, %cst_1069 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%40 = chlo.broadcast_add %39, %cst_1070 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%41 = "mhlo.reshape"(%40) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%42 = "mhlo.dot"(%41, %cst_1075) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%43 = chlo.broadcast_add %42, %cst_1076 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%44 = "mhlo.reshape"(%43) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%45 = "mhlo.transpose"(%44) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%46 = "mhlo.dot"(%41, %cst_1073) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%47 = chlo.broadcast_add %46, %cst_1074 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%48 = "mhlo.reshape"(%47) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%49 = "mhlo.transpose"(%48) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%50 = "mhlo.dot_general"(%49, %45) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%51 = chlo.broadcast_multiply %50, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%52 = chlo.broadcast_add %51, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%53 = "mhlo.reduce"(%52, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%54 = linalg.tensor_expand_shape %53 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%55 = chlo.broadcast_subtract %52, %54 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%56 = "mhlo.exponential"(%55) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%57 = "mhlo.reduce"(%56, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%58 = linalg.tensor_expand_shape %57 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%59 = chlo.broadcast_divide %56, %58 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%60 = "mhlo.dot_general"(%59, %34) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%61 = "mhlo.transpose"(%60) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%62 = "mhlo.reshape"(%61) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%63 = "mhlo.dot"(%62, %cst_1077) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%64 = chlo.broadcast_add %63, %cst_1078 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%65 = "mhlo.reshape"(%64) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%66 = "mhlo.dot"(%30, %cst_1064) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%67 = chlo.broadcast_add %66, %cst_1065 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%68 = "mhlo.reshape"(%67) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%69 = chlo.broadcast_multiply %68, %cst_1066 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%70 = chlo.broadcast_add %69, %cst_1078 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%71 = chlo.broadcast_add %65, %70 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%72 = chlo.broadcast_multiply %71, %cst_1079 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%73 = chlo.broadcast_add %72, %cst_1080 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%74 = "mhlo.reshape"(%73) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%75 = "mhlo.dot"(%74, %cst_1062) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%76 = chlo.broadcast_add %75, %cst_1063 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%77 = "mhlo.reshape"(%76) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%78 = chlo.broadcast_maximum %77, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%79 = "mhlo.reshape"(%78) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%80 = "mhlo.dot"(%79, %cst_1058) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%81 = chlo.broadcast_add %80, %cst_1059 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%82 = "mhlo.reshape"(%81) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%83 = chlo.broadcast_add %82, %73 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%84 = chlo.broadcast_multiply %83, %cst_1060 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%85 = chlo.broadcast_add %84, %cst_1061 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%86 = "mhlo.reshape"(%85) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%87 = "mhlo.dot"(%86, %cst_1056) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%88 = chlo.broadcast_add %87, %cst_1057 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%89 = "mhlo.reshape"(%88) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%90 = chlo.broadcast_maximum %89, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%91 = "mhlo.reshape"(%90) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%92 = "mhlo.dot"(%91, %cst_1052) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%93 = chlo.broadcast_add %92, %cst_1053 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%94 = "mhlo.reshape"(%93) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%95 = chlo.broadcast_add %94, %85 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%96 = chlo.broadcast_multiply %95, %cst_1054 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%97 = chlo.broadcast_add %96, %cst_1055 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%98 = "mhlo.reshape"(%97) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%99 = "mhlo.dot"(%98, %cst_1050) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%100 = chlo.broadcast_add %99, %cst_1051 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%101 = "mhlo.reshape"(%100) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%102 = chlo.broadcast_maximum %101, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%103 = "mhlo.reshape"(%102) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%104 = "mhlo.dot"(%103, %cst_1046) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%105 = chlo.broadcast_add %104, %cst_1047 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%106 = "mhlo.reshape"(%105) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%107 = chlo.broadcast_add %106, %97 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%108 = chlo.broadcast_multiply %107, %cst_1048 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%109 = chlo.broadcast_add %108, %cst_1049 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%110 = "mhlo.reshape"(%109) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%111 = "mhlo.dot"(%110, %cst_1044) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%112 = chlo.broadcast_add %111, %cst_1045 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%113 = "mhlo.reshape"(%112) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%114 = chlo.broadcast_maximum %113, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%115 = "mhlo.reshape"(%114) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%116 = "mhlo.dot"(%115, %cst_1036) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%117 = chlo.broadcast_add %116, %cst_1037 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%118 = "mhlo.reshape"(%117) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%119 = chlo.broadcast_add %118, %109 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%120 = chlo.broadcast_multiply %119, %cst_1042 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%121 = chlo.broadcast_add %120, %cst_1043 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%122 = "mhlo.reshape"(%121) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%123 = "mhlo.dot"(%122, %cst_1038) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%124 = chlo.broadcast_add %123, %cst_1039 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%125 = "mhlo.reshape"(%124) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%126 = chlo.broadcast_add %125, %29 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%127 = chlo.broadcast_multiply %126, %cst_1040 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%128 = chlo.broadcast_add %127, %cst_1041 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%129 = "mhlo.reshape"(%128) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%130 = "mhlo.dot"(%129, %cst_1026) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%131 = chlo.broadcast_add %130, %cst_1027 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%132 = "mhlo.reshape"(%131) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%133 = "mhlo.transpose"(%132) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%134 = "mhlo.dot"(%129, %cst_1022) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%135 = "mhlo.reshape"(%134) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%136 = "mhlo.broadcast_in_dim"(%cst_1023) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%137 = mhlo.add %135, %136 : tensor<1x384x128xf32>
%138 = chlo.broadcast_multiply %137, %cst_1024 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%139 = chlo.broadcast_add %138, %cst_1025 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%140 = "mhlo.reshape"(%139) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%141 = "mhlo.dot"(%140, %cst_1030) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%142 = chlo.broadcast_add %141, %cst_1031 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%143 = "mhlo.reshape"(%142) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%144 = "mhlo.transpose"(%143) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%145 = "mhlo.dot"(%140, %cst_1028) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%146 = chlo.broadcast_add %145, %cst_1029 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%147 = "mhlo.reshape"(%146) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%148 = "mhlo.transpose"(%147) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%149 = "mhlo.dot_general"(%148, %144) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%150 = chlo.broadcast_multiply %149, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%151 = chlo.broadcast_add %150, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%152 = "mhlo.reduce"(%151, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%153 = linalg.tensor_expand_shape %152 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%154 = chlo.broadcast_subtract %151, %153 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%155 = "mhlo.exponential"(%154) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%156 = "mhlo.reduce"(%155, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%157 = linalg.tensor_expand_shape %156 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%158 = chlo.broadcast_divide %155, %157 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%159 = "mhlo.dot_general"(%158, %133) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%160 = "mhlo.transpose"(%159) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%161 = "mhlo.reshape"(%160) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%162 = "mhlo.dot"(%161, %cst_1032) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%163 = chlo.broadcast_add %162, %cst_1033 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%164 = "mhlo.reshape"(%163) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%165 = "mhlo.dot"(%129, %cst_1019) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%166 = chlo.broadcast_add %165, %cst_1020 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%167 = "mhlo.reshape"(%166) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%168 = chlo.broadcast_multiply %167, %cst_1021 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%169 = chlo.broadcast_add %168, %cst_1033 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%170 = chlo.broadcast_add %164, %169 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%171 = chlo.broadcast_multiply %170, %cst_1034 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%172 = chlo.broadcast_add %171, %cst_1035 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%173 = "mhlo.reshape"(%172) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%174 = "mhlo.dot"(%173, %cst_1017) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%175 = chlo.broadcast_add %174, %cst_1018 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%176 = "mhlo.reshape"(%175) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%177 = chlo.broadcast_maximum %176, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%178 = "mhlo.reshape"(%177) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%179 = "mhlo.dot"(%178, %cst_1013) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%180 = chlo.broadcast_add %179, %cst_1014 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%181 = "mhlo.reshape"(%180) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%182 = chlo.broadcast_add %181, %172 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%183 = chlo.broadcast_multiply %182, %cst_1015 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%184 = chlo.broadcast_add %183, %cst_1016 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%185 = "mhlo.reshape"(%184) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%186 = "mhlo.dot"(%185, %cst_1011) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%187 = chlo.broadcast_add %186, %cst_1012 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%188 = "mhlo.reshape"(%187) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%189 = chlo.broadcast_maximum %188, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%190 = "mhlo.reshape"(%189) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%191 = "mhlo.dot"(%190, %cst_1007) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%192 = chlo.broadcast_add %191, %cst_1008 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%193 = "mhlo.reshape"(%192) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%194 = chlo.broadcast_add %193, %184 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%195 = chlo.broadcast_multiply %194, %cst_1009 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%196 = chlo.broadcast_add %195, %cst_1010 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%197 = "mhlo.reshape"(%196) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%198 = "mhlo.dot"(%197, %cst_1005) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%199 = chlo.broadcast_add %198, %cst_1006 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%200 = "mhlo.reshape"(%199) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%201 = chlo.broadcast_maximum %200, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%202 = "mhlo.reshape"(%201) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%203 = "mhlo.dot"(%202, %cst_1001) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%204 = chlo.broadcast_add %203, %cst_1002 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%205 = "mhlo.reshape"(%204) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%206 = chlo.broadcast_add %205, %196 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%207 = chlo.broadcast_multiply %206, %cst_1003 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%208 = chlo.broadcast_add %207, %cst_1004 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%209 = "mhlo.reshape"(%208) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%210 = "mhlo.dot"(%209, %cst_999) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%211 = chlo.broadcast_add %210, %cst_1000 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%212 = "mhlo.reshape"(%211) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%213 = chlo.broadcast_maximum %212, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%214 = "mhlo.reshape"(%213) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%215 = "mhlo.dot"(%214, %cst_991) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%216 = chlo.broadcast_add %215, %cst_992 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%217 = "mhlo.reshape"(%216) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%218 = chlo.broadcast_add %217, %208 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%219 = chlo.broadcast_multiply %218, %cst_997 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%220 = chlo.broadcast_add %219, %cst_998 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%221 = "mhlo.reshape"(%220) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%222 = "mhlo.dot"(%221, %cst_993) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%223 = chlo.broadcast_add %222, %cst_994 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%224 = "mhlo.reshape"(%223) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%225 = chlo.broadcast_add %224, %128 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%226 = chlo.broadcast_multiply %225, %cst_995 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%227 = chlo.broadcast_add %226, %cst_996 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%228 = "mhlo.reshape"(%227) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%229 = "mhlo.dot"(%228, %cst_531) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%230 = chlo.broadcast_add %229, %cst_532 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%231 = "mhlo.reshape"(%230) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%232 = "mhlo.transpose"(%231) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%233 = "mhlo.dot"(%228, %cst_527) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%234 = "mhlo.reshape"(%233) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%235 = "mhlo.broadcast_in_dim"(%cst_528) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%236 = mhlo.add %234, %235 : tensor<1x384x128xf32>
%237 = chlo.broadcast_multiply %236, %cst_529 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%238 = chlo.broadcast_add %237, %cst_530 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%239 = "mhlo.reshape"(%238) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%240 = "mhlo.dot"(%239, %cst_535) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%241 = chlo.broadcast_add %240, %cst_536 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%242 = "mhlo.reshape"(%241) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%243 = "mhlo.transpose"(%242) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%244 = "mhlo.dot"(%239, %cst_533) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%245 = chlo.broadcast_add %244, %cst_534 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%246 = "mhlo.reshape"(%245) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%247 = "mhlo.transpose"(%246) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%248 = "mhlo.dot_general"(%247, %243) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%249 = chlo.broadcast_multiply %248, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%250 = chlo.broadcast_add %249, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%251 = "mhlo.reduce"(%250, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%252 = linalg.tensor_expand_shape %251 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%253 = chlo.broadcast_subtract %250, %252 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%254 = "mhlo.exponential"(%253) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%255 = "mhlo.reduce"(%254, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%256 = linalg.tensor_expand_shape %255 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%257 = chlo.broadcast_divide %254, %256 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%258 = "mhlo.dot_general"(%257, %232) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%259 = "mhlo.transpose"(%258) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%260 = "mhlo.reshape"(%259) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%261 = "mhlo.dot"(%260, %cst_537) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%262 = chlo.broadcast_add %261, %cst_538 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%263 = "mhlo.reshape"(%262) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%264 = "mhlo.dot"(%228, %cst_524) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%265 = chlo.broadcast_add %264, %cst_525 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%266 = "mhlo.reshape"(%265) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%267 = chlo.broadcast_multiply %266, %cst_526 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%268 = chlo.broadcast_add %267, %cst_538 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%269 = chlo.broadcast_add %263, %268 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%270 = chlo.broadcast_multiply %269, %cst_539 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%271 = chlo.broadcast_add %270, %cst_540 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%272 = "mhlo.reshape"(%271) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%273 = "mhlo.dot"(%272, %cst_522) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%274 = chlo.broadcast_add %273, %cst_523 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%275 = "mhlo.reshape"(%274) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%276 = chlo.broadcast_maximum %275, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%277 = "mhlo.reshape"(%276) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%278 = "mhlo.dot"(%277, %cst_518) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%279 = chlo.broadcast_add %278, %cst_519 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%280 = "mhlo.reshape"(%279) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%281 = chlo.broadcast_add %280, %271 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%282 = chlo.broadcast_multiply %281, %cst_520 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%283 = chlo.broadcast_add %282, %cst_521 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%284 = "mhlo.reshape"(%283) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%285 = "mhlo.dot"(%284, %cst_516) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%286 = chlo.broadcast_add %285, %cst_517 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%287 = "mhlo.reshape"(%286) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%288 = chlo.broadcast_maximum %287, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%289 = "mhlo.reshape"(%288) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%290 = "mhlo.dot"(%289, %cst_512) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%291 = chlo.broadcast_add %290, %cst_513 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%292 = "mhlo.reshape"(%291) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%293 = chlo.broadcast_add %292, %283 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%294 = chlo.broadcast_multiply %293, %cst_514 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%295 = chlo.broadcast_add %294, %cst_515 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%296 = "mhlo.reshape"(%295) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%297 = "mhlo.dot"(%296, %cst_510) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%298 = chlo.broadcast_add %297, %cst_511 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%299 = "mhlo.reshape"(%298) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%300 = chlo.broadcast_maximum %299, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%301 = "mhlo.reshape"(%300) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%302 = "mhlo.dot"(%301, %cst_506) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%303 = chlo.broadcast_add %302, %cst_507 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%304 = "mhlo.reshape"(%303) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%305 = chlo.broadcast_add %304, %295 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%306 = chlo.broadcast_multiply %305, %cst_508 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%307 = chlo.broadcast_add %306, %cst_509 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%308 = "mhlo.reshape"(%307) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%309 = "mhlo.dot"(%308, %cst_504) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%310 = chlo.broadcast_add %309, %cst_505 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%311 = "mhlo.reshape"(%310) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%312 = chlo.broadcast_maximum %311, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%313 = "mhlo.reshape"(%312) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%314 = "mhlo.dot"(%313, %cst_496) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%315 = chlo.broadcast_add %314, %cst_497 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%316 = "mhlo.reshape"(%315) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%317 = chlo.broadcast_add %316, %307 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%318 = chlo.broadcast_multiply %317, %cst_502 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%319 = chlo.broadcast_add %318, %cst_503 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%320 = "mhlo.reshape"(%319) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%321 = "mhlo.dot"(%320, %cst_498) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%322 = chlo.broadcast_add %321, %cst_499 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%323 = "mhlo.reshape"(%322) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%324 = chlo.broadcast_add %323, %227 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%325 = chlo.broadcast_multiply %324, %cst_500 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%326 = chlo.broadcast_add %325, %cst_501 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%327 = "mhlo.reshape"(%326) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%328 = "mhlo.dot"(%327, %cst_306) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%329 = chlo.broadcast_add %328, %cst_307 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%330 = "mhlo.reshape"(%329) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%331 = "mhlo.transpose"(%330) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%332 = "mhlo.dot"(%327, %cst_302) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%333 = "mhlo.reshape"(%332) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%334 = "mhlo.broadcast_in_dim"(%cst_303) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%335 = mhlo.add %333, %334 : tensor<1x384x128xf32>
%336 = chlo.broadcast_multiply %335, %cst_304 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%337 = chlo.broadcast_add %336, %cst_305 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%338 = "mhlo.reshape"(%337) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%339 = "mhlo.dot"(%338, %cst_310) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%340 = chlo.broadcast_add %339, %cst_311 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%341 = "mhlo.reshape"(%340) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%342 = "mhlo.transpose"(%341) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%343 = "mhlo.dot"(%338, %cst_308) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%344 = chlo.broadcast_add %343, %cst_309 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%345 = "mhlo.reshape"(%344) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%346 = "mhlo.transpose"(%345) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%347 = "mhlo.dot_general"(%346, %342) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%348 = chlo.broadcast_multiply %347, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%349 = chlo.broadcast_add %348, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%350 = "mhlo.reduce"(%349, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%351 = linalg.tensor_expand_shape %350 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%352 = chlo.broadcast_subtract %349, %351 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%353 = "mhlo.exponential"(%352) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%354 = "mhlo.reduce"(%353, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%355 = linalg.tensor_expand_shape %354 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%356 = chlo.broadcast_divide %353, %355 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%357 = "mhlo.dot_general"(%356, %331) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%358 = "mhlo.transpose"(%357) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%359 = "mhlo.reshape"(%358) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%360 = "mhlo.dot"(%359, %cst_312) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%361 = chlo.broadcast_add %360, %cst_313 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%362 = "mhlo.reshape"(%361) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%363 = "mhlo.dot"(%327, %cst_299) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%364 = chlo.broadcast_add %363, %cst_300 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%365 = "mhlo.reshape"(%364) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%366 = chlo.broadcast_multiply %365, %cst_301 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%367 = chlo.broadcast_add %366, %cst_313 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%368 = chlo.broadcast_add %362, %367 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%369 = chlo.broadcast_multiply %368, %cst_314 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%370 = chlo.broadcast_add %369, %cst_315 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%371 = "mhlo.reshape"(%370) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%372 = "mhlo.dot"(%371, %cst_297) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%373 = chlo.broadcast_add %372, %cst_298 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%374 = "mhlo.reshape"(%373) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%375 = chlo.broadcast_maximum %374, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%376 = "mhlo.reshape"(%375) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%377 = "mhlo.dot"(%376, %cst_293) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%378 = chlo.broadcast_add %377, %cst_294 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%379 = "mhlo.reshape"(%378) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%380 = chlo.broadcast_add %379, %370 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%381 = chlo.broadcast_multiply %380, %cst_295 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%382 = chlo.broadcast_add %381, %cst_296 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%383 = "mhlo.reshape"(%382) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%384 = "mhlo.dot"(%383, %cst_291) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%385 = chlo.broadcast_add %384, %cst_292 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%386 = "mhlo.reshape"(%385) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%387 = chlo.broadcast_maximum %386, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%388 = "mhlo.reshape"(%387) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%389 = "mhlo.dot"(%388, %cst_287) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%390 = chlo.broadcast_add %389, %cst_288 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%391 = "mhlo.reshape"(%390) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%392 = chlo.broadcast_add %391, %382 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%393 = chlo.broadcast_multiply %392, %cst_289 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%394 = chlo.broadcast_add %393, %cst_290 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%395 = "mhlo.reshape"(%394) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%396 = "mhlo.dot"(%395, %cst_285) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%397 = chlo.broadcast_add %396, %cst_286 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%398 = "mhlo.reshape"(%397) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%399 = chlo.broadcast_maximum %398, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%400 = "mhlo.reshape"(%399) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%401 = "mhlo.dot"(%400, %cst_281) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%402 = chlo.broadcast_add %401, %cst_282 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%403 = "mhlo.reshape"(%402) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%404 = chlo.broadcast_add %403, %394 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%405 = chlo.broadcast_multiply %404, %cst_283 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%406 = chlo.broadcast_add %405, %cst_284 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%407 = "mhlo.reshape"(%406) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%408 = "mhlo.dot"(%407, %cst_279) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%409 = chlo.broadcast_add %408, %cst_280 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%410 = "mhlo.reshape"(%409) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%411 = chlo.broadcast_maximum %410, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%412 = "mhlo.reshape"(%411) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%413 = "mhlo.dot"(%412, %cst_271) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%414 = chlo.broadcast_add %413, %cst_272 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%415 = "mhlo.reshape"(%414) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%416 = chlo.broadcast_add %415, %406 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%417 = chlo.broadcast_multiply %416, %cst_277 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%418 = chlo.broadcast_add %417, %cst_278 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%419 = "mhlo.reshape"(%418) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%420 = "mhlo.dot"(%419, %cst_273) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%421 = chlo.broadcast_add %420, %cst_274 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%422 = "mhlo.reshape"(%421) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%423 = chlo.broadcast_add %422, %326 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%424 = chlo.broadcast_multiply %423, %cst_275 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%425 = chlo.broadcast_add %424, %cst_276 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%426 = "mhlo.reshape"(%425) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%427 = "mhlo.dot"(%426, %cst_261) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%428 = chlo.broadcast_add %427, %cst_262 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%429 = "mhlo.reshape"(%428) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%430 = "mhlo.transpose"(%429) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%431 = "mhlo.dot"(%426, %cst_257) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%432 = "mhlo.reshape"(%431) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%433 = "mhlo.broadcast_in_dim"(%cst_258) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%434 = mhlo.add %432, %433 : tensor<1x384x128xf32>
%435 = chlo.broadcast_multiply %434, %cst_259 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%436 = chlo.broadcast_add %435, %cst_260 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%437 = "mhlo.reshape"(%436) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%438 = "mhlo.dot"(%437, %cst_265) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%439 = chlo.broadcast_add %438, %cst_266 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%440 = "mhlo.reshape"(%439) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%441 = "mhlo.transpose"(%440) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%442 = "mhlo.dot"(%437, %cst_263) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%443 = chlo.broadcast_add %442, %cst_264 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%444 = "mhlo.reshape"(%443) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%445 = "mhlo.transpose"(%444) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%446 = "mhlo.dot_general"(%445, %441) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%447 = chlo.broadcast_multiply %446, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%448 = chlo.broadcast_add %447, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%449 = "mhlo.reduce"(%448, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%450 = linalg.tensor_expand_shape %449 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%451 = chlo.broadcast_subtract %448, %450 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%452 = "mhlo.exponential"(%451) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%453 = "mhlo.reduce"(%452, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%454 = linalg.tensor_expand_shape %453 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%455 = chlo.broadcast_divide %452, %454 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%456 = "mhlo.dot_general"(%455, %430) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%457 = "mhlo.transpose"(%456) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%458 = "mhlo.reshape"(%457) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%459 = "mhlo.dot"(%458, %cst_267) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%460 = chlo.broadcast_add %459, %cst_268 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%461 = "mhlo.reshape"(%460) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%462 = "mhlo.dot"(%426, %cst_254) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%463 = chlo.broadcast_add %462, %cst_255 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%464 = "mhlo.reshape"(%463) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%465 = chlo.broadcast_multiply %464, %cst_256 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%466 = chlo.broadcast_add %465, %cst_268 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%467 = chlo.broadcast_add %461, %466 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%468 = chlo.broadcast_multiply %467, %cst_269 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%469 = chlo.broadcast_add %468, %cst_270 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%470 = "mhlo.reshape"(%469) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%471 = "mhlo.dot"(%470, %cst_252) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%472 = chlo.broadcast_add %471, %cst_253 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%473 = "mhlo.reshape"(%472) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%474 = chlo.broadcast_maximum %473, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%475 = "mhlo.reshape"(%474) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%476 = "mhlo.dot"(%475, %cst_248) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%477 = chlo.broadcast_add %476, %cst_249 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%478 = "mhlo.reshape"(%477) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%479 = chlo.broadcast_add %478, %469 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%480 = chlo.broadcast_multiply %479, %cst_250 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%481 = chlo.broadcast_add %480, %cst_251 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%482 = "mhlo.reshape"(%481) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%483 = "mhlo.dot"(%482, %cst_246) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%484 = chlo.broadcast_add %483, %cst_247 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%485 = "mhlo.reshape"(%484) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%486 = chlo.broadcast_maximum %485, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%487 = "mhlo.reshape"(%486) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%488 = "mhlo.dot"(%487, %cst_242) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%489 = chlo.broadcast_add %488, %cst_243 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%490 = "mhlo.reshape"(%489) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%491 = chlo.broadcast_add %490, %481 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%492 = chlo.broadcast_multiply %491, %cst_244 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%493 = chlo.broadcast_add %492, %cst_245 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%494 = "mhlo.reshape"(%493) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%495 = "mhlo.dot"(%494, %cst_240) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%496 = chlo.broadcast_add %495, %cst_241 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%497 = "mhlo.reshape"(%496) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%498 = chlo.broadcast_maximum %497, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%499 = "mhlo.reshape"(%498) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%500 = "mhlo.dot"(%499, %cst_236) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%501 = chlo.broadcast_add %500, %cst_237 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%502 = "mhlo.reshape"(%501) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%503 = chlo.broadcast_add %502, %493 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%504 = chlo.broadcast_multiply %503, %cst_238 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%505 = chlo.broadcast_add %504, %cst_239 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%506 = "mhlo.reshape"(%505) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%507 = "mhlo.dot"(%506, %cst_234) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%508 = chlo.broadcast_add %507, %cst_235 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%509 = "mhlo.reshape"(%508) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%510 = chlo.broadcast_maximum %509, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%511 = "mhlo.reshape"(%510) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%512 = "mhlo.dot"(%511, %cst_226) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%513 = chlo.broadcast_add %512, %cst_227 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%514 = "mhlo.reshape"(%513) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%515 = chlo.broadcast_add %514, %505 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%516 = chlo.broadcast_multiply %515, %cst_232 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%517 = chlo.broadcast_add %516, %cst_233 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%518 = "mhlo.reshape"(%517) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%519 = "mhlo.dot"(%518, %cst_228) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%520 = chlo.broadcast_add %519, %cst_229 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%521 = "mhlo.reshape"(%520) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%522 = chlo.broadcast_add %521, %425 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%523 = chlo.broadcast_multiply %522, %cst_230 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%524 = chlo.broadcast_add %523, %cst_231 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%525 = "mhlo.reshape"(%524) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%526 = "mhlo.dot"(%525, %cst_216) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%527 = chlo.broadcast_add %526, %cst_217 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%528 = "mhlo.reshape"(%527) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%529 = "mhlo.transpose"(%528) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%530 = "mhlo.dot"(%525, %cst_212) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%531 = "mhlo.reshape"(%530) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%532 = "mhlo.broadcast_in_dim"(%cst_213) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%533 = mhlo.add %531, %532 : tensor<1x384x128xf32>
%534 = chlo.broadcast_multiply %533, %cst_214 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%535 = chlo.broadcast_add %534, %cst_215 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%536 = "mhlo.reshape"(%535) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%537 = "mhlo.dot"(%536, %cst_220) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%538 = chlo.broadcast_add %537, %cst_221 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%539 = "mhlo.reshape"(%538) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%540 = "mhlo.transpose"(%539) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%541 = "mhlo.dot"(%536, %cst_218) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%542 = chlo.broadcast_add %541, %cst_219 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%543 = "mhlo.reshape"(%542) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%544 = "mhlo.transpose"(%543) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%545 = "mhlo.dot_general"(%544, %540) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%546 = chlo.broadcast_multiply %545, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%547 = chlo.broadcast_add %546, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%548 = "mhlo.reduce"(%547, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%549 = linalg.tensor_expand_shape %548 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%550 = chlo.broadcast_subtract %547, %549 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%551 = "mhlo.exponential"(%550) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%552 = "mhlo.reduce"(%551, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%553 = linalg.tensor_expand_shape %552 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%554 = chlo.broadcast_divide %551, %553 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%555 = "mhlo.dot_general"(%554, %529) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%556 = "mhlo.transpose"(%555) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%557 = "mhlo.reshape"(%556) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%558 = "mhlo.dot"(%557, %cst_222) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%559 = chlo.broadcast_add %558, %cst_223 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%560 = "mhlo.reshape"(%559) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%561 = "mhlo.dot"(%525, %cst_209) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%562 = chlo.broadcast_add %561, %cst_210 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%563 = "mhlo.reshape"(%562) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%564 = chlo.broadcast_multiply %563, %cst_211 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%565 = chlo.broadcast_add %564, %cst_223 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%566 = chlo.broadcast_add %560, %565 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%567 = chlo.broadcast_multiply %566, %cst_224 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%568 = chlo.broadcast_add %567, %cst_225 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%569 = "mhlo.reshape"(%568) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%570 = "mhlo.dot"(%569, %cst_207) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%571 = chlo.broadcast_add %570, %cst_208 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%572 = "mhlo.reshape"(%571) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%573 = chlo.broadcast_maximum %572, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%574 = "mhlo.reshape"(%573) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%575 = "mhlo.dot"(%574, %cst_203) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%576 = chlo.broadcast_add %575, %cst_204 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%577 = "mhlo.reshape"(%576) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%578 = chlo.broadcast_add %577, %568 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%579 = chlo.broadcast_multiply %578, %cst_205 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%580 = chlo.broadcast_add %579, %cst_206 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%581 = "mhlo.reshape"(%580) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%582 = "mhlo.dot"(%581, %cst_201) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%583 = chlo.broadcast_add %582, %cst_202 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%584 = "mhlo.reshape"(%583) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%585 = chlo.broadcast_maximum %584, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%586 = "mhlo.reshape"(%585) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%587 = "mhlo.dot"(%586, %cst_197) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%588 = chlo.broadcast_add %587, %cst_198 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%589 = "mhlo.reshape"(%588) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%590 = chlo.broadcast_add %589, %580 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%591 = chlo.broadcast_multiply %590, %cst_199 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%592 = chlo.broadcast_add %591, %cst_200 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%593 = "mhlo.reshape"(%592) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%594 = "mhlo.dot"(%593, %cst_195) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%595 = chlo.broadcast_add %594, %cst_196 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%596 = "mhlo.reshape"(%595) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%597 = chlo.broadcast_maximum %596, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%598 = "mhlo.reshape"(%597) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%599 = "mhlo.dot"(%598, %cst_191) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%600 = chlo.broadcast_add %599, %cst_192 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%601 = "mhlo.reshape"(%600) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%602 = chlo.broadcast_add %601, %592 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%603 = chlo.broadcast_multiply %602, %cst_193 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%604 = chlo.broadcast_add %603, %cst_194 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%605 = "mhlo.reshape"(%604) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%606 = "mhlo.dot"(%605, %cst_189) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%607 = chlo.broadcast_add %606, %cst_190 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%608 = "mhlo.reshape"(%607) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%609 = chlo.broadcast_maximum %608, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%610 = "mhlo.reshape"(%609) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%611 = "mhlo.dot"(%610, %cst_181) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%612 = chlo.broadcast_add %611, %cst_182 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%613 = "mhlo.reshape"(%612) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%614 = chlo.broadcast_add %613, %604 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%615 = chlo.broadcast_multiply %614, %cst_187 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%616 = chlo.broadcast_add %615, %cst_188 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%617 = "mhlo.reshape"(%616) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%618 = "mhlo.dot"(%617, %cst_183) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%619 = chlo.broadcast_add %618, %cst_184 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%620 = "mhlo.reshape"(%619) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%621 = chlo.broadcast_add %620, %524 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%622 = chlo.broadcast_multiply %621, %cst_185 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%623 = chlo.broadcast_add %622, %cst_186 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%624 = "mhlo.reshape"(%623) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%625 = "mhlo.dot"(%624, %cst_171) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%626 = chlo.broadcast_add %625, %cst_172 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%627 = "mhlo.reshape"(%626) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%628 = "mhlo.transpose"(%627) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%629 = "mhlo.dot"(%624, %cst_167) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%630 = "mhlo.reshape"(%629) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%631 = "mhlo.broadcast_in_dim"(%cst_168) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%632 = mhlo.add %630, %631 : tensor<1x384x128xf32>
%633 = chlo.broadcast_multiply %632, %cst_169 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%634 = chlo.broadcast_add %633, %cst_170 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%635 = "mhlo.reshape"(%634) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%636 = "mhlo.dot"(%635, %cst_175) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%637 = chlo.broadcast_add %636, %cst_176 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%638 = "mhlo.reshape"(%637) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%639 = "mhlo.transpose"(%638) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%640 = "mhlo.dot"(%635, %cst_173) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%641 = chlo.broadcast_add %640, %cst_174 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%642 = "mhlo.reshape"(%641) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%643 = "mhlo.transpose"(%642) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%644 = "mhlo.dot_general"(%643, %639) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%645 = chlo.broadcast_multiply %644, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%646 = chlo.broadcast_add %645, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%647 = "mhlo.reduce"(%646, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%648 = linalg.tensor_expand_shape %647 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%649 = chlo.broadcast_subtract %646, %648 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%650 = "mhlo.exponential"(%649) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%651 = "mhlo.reduce"(%650, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%652 = linalg.tensor_expand_shape %651 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%653 = chlo.broadcast_divide %650, %652 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%654 = "mhlo.dot_general"(%653, %628) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%655 = "mhlo.transpose"(%654) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%656 = "mhlo.reshape"(%655) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%657 = "mhlo.dot"(%656, %cst_177) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%658 = chlo.broadcast_add %657, %cst_178 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%659 = "mhlo.reshape"(%658) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%660 = "mhlo.dot"(%624, %cst_164) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%661 = chlo.broadcast_add %660, %cst_165 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%662 = "mhlo.reshape"(%661) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%663 = chlo.broadcast_multiply %662, %cst_166 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%664 = chlo.broadcast_add %663, %cst_178 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%665 = chlo.broadcast_add %659, %664 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%666 = chlo.broadcast_multiply %665, %cst_179 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%667 = chlo.broadcast_add %666, %cst_180 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%668 = "mhlo.reshape"(%667) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%669 = "mhlo.dot"(%668, %cst_162) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%670 = chlo.broadcast_add %669, %cst_163 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%671 = "mhlo.reshape"(%670) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%672 = chlo.broadcast_maximum %671, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%673 = "mhlo.reshape"(%672) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%674 = "mhlo.dot"(%673, %cst_158) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%675 = chlo.broadcast_add %674, %cst_159 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%676 = "mhlo.reshape"(%675) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%677 = chlo.broadcast_add %676, %667 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%678 = chlo.broadcast_multiply %677, %cst_160 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%679 = chlo.broadcast_add %678, %cst_161 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%680 = "mhlo.reshape"(%679) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%681 = "mhlo.dot"(%680, %cst_156) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%682 = chlo.broadcast_add %681, %cst_157 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%683 = "mhlo.reshape"(%682) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%684 = chlo.broadcast_maximum %683, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%685 = "mhlo.reshape"(%684) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%686 = "mhlo.dot"(%685, %cst_152) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%687 = chlo.broadcast_add %686, %cst_153 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%688 = "mhlo.reshape"(%687) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%689 = chlo.broadcast_add %688, %679 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%690 = chlo.broadcast_multiply %689, %cst_154 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%691 = chlo.broadcast_add %690, %cst_155 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%692 = "mhlo.reshape"(%691) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%693 = "mhlo.dot"(%692, %cst_150) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%694 = chlo.broadcast_add %693, %cst_151 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%695 = "mhlo.reshape"(%694) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%696 = chlo.broadcast_maximum %695, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%697 = "mhlo.reshape"(%696) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%698 = "mhlo.dot"(%697, %cst_146) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%699 = chlo.broadcast_add %698, %cst_147 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%700 = "mhlo.reshape"(%699) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%701 = chlo.broadcast_add %700, %691 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%702 = chlo.broadcast_multiply %701, %cst_148 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%703 = chlo.broadcast_add %702, %cst_149 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%704 = "mhlo.reshape"(%703) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%705 = "mhlo.dot"(%704, %cst_144) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%706 = chlo.broadcast_add %705, %cst_145 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%707 = "mhlo.reshape"(%706) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%708 = chlo.broadcast_maximum %707, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%709 = "mhlo.reshape"(%708) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%710 = "mhlo.dot"(%709, %cst_136) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%711 = chlo.broadcast_add %710, %cst_137 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%712 = "mhlo.reshape"(%711) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%713 = chlo.broadcast_add %712, %703 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%714 = chlo.broadcast_multiply %713, %cst_142 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%715 = chlo.broadcast_add %714, %cst_143 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%716 = "mhlo.reshape"(%715) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%717 = "mhlo.dot"(%716, %cst_138) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%718 = chlo.broadcast_add %717, %cst_139 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%719 = "mhlo.reshape"(%718) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%720 = chlo.broadcast_add %719, %623 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%721 = chlo.broadcast_multiply %720, %cst_140 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%722 = chlo.broadcast_add %721, %cst_141 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%723 = "mhlo.reshape"(%722) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%724 = "mhlo.dot"(%723, %cst_126) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%725 = chlo.broadcast_add %724, %cst_127 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%726 = "mhlo.reshape"(%725) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%727 = "mhlo.transpose"(%726) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%728 = "mhlo.dot"(%723, %cst_122) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%729 = "mhlo.reshape"(%728) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%730 = "mhlo.broadcast_in_dim"(%cst_123) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%731 = mhlo.add %729, %730 : tensor<1x384x128xf32>
%732 = chlo.broadcast_multiply %731, %cst_124 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%733 = chlo.broadcast_add %732, %cst_125 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%734 = "mhlo.reshape"(%733) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%735 = "mhlo.dot"(%734, %cst_130) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%736 = chlo.broadcast_add %735, %cst_131 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%737 = "mhlo.reshape"(%736) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%738 = "mhlo.transpose"(%737) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%739 = "mhlo.dot"(%734, %cst_128) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%740 = chlo.broadcast_add %739, %cst_129 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%741 = "mhlo.reshape"(%740) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%742 = "mhlo.transpose"(%741) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%743 = "mhlo.dot_general"(%742, %738) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%744 = chlo.broadcast_multiply %743, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%745 = chlo.broadcast_add %744, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%746 = "mhlo.reduce"(%745, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%747 = linalg.tensor_expand_shape %746 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%748 = chlo.broadcast_subtract %745, %747 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%749 = "mhlo.exponential"(%748) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%750 = "mhlo.reduce"(%749, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%751 = linalg.tensor_expand_shape %750 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%752 = chlo.broadcast_divide %749, %751 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%753 = "mhlo.dot_general"(%752, %727) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%754 = "mhlo.transpose"(%753) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%755 = "mhlo.reshape"(%754) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%756 = "mhlo.dot"(%755, %cst_132) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%757 = chlo.broadcast_add %756, %cst_133 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%758 = "mhlo.reshape"(%757) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%759 = "mhlo.dot"(%723, %cst_119) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%760 = chlo.broadcast_add %759, %cst_120 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%761 = "mhlo.reshape"(%760) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%762 = chlo.broadcast_multiply %761, %cst_121 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%763 = chlo.broadcast_add %762, %cst_133 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%764 = chlo.broadcast_add %758, %763 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%765 = chlo.broadcast_multiply %764, %cst_134 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%766 = chlo.broadcast_add %765, %cst_135 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%767 = "mhlo.reshape"(%766) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%768 = "mhlo.dot"(%767, %cst_117) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%769 = chlo.broadcast_add %768, %cst_118 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%770 = "mhlo.reshape"(%769) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%771 = chlo.broadcast_maximum %770, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%772 = "mhlo.reshape"(%771) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%773 = "mhlo.dot"(%772, %cst_113) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%774 = chlo.broadcast_add %773, %cst_114 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%775 = "mhlo.reshape"(%774) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%776 = chlo.broadcast_add %775, %766 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%777 = chlo.broadcast_multiply %776, %cst_115 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%778 = chlo.broadcast_add %777, %cst_116 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%779 = "mhlo.reshape"(%778) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%780 = "mhlo.dot"(%779, %cst_111) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%781 = chlo.broadcast_add %780, %cst_112 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%782 = "mhlo.reshape"(%781) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%783 = chlo.broadcast_maximum %782, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%784 = "mhlo.reshape"(%783) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%785 = "mhlo.dot"(%784, %cst_107) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%786 = chlo.broadcast_add %785, %cst_108 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%787 = "mhlo.reshape"(%786) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%788 = chlo.broadcast_add %787, %778 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%789 = chlo.broadcast_multiply %788, %cst_109 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%790 = chlo.broadcast_add %789, %cst_110 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%791 = "mhlo.reshape"(%790) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%792 = "mhlo.dot"(%791, %cst_105) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%793 = chlo.broadcast_add %792, %cst_106 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%794 = "mhlo.reshape"(%793) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%795 = chlo.broadcast_maximum %794, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%796 = "mhlo.reshape"(%795) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%797 = "mhlo.dot"(%796, %cst_101) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%798 = chlo.broadcast_add %797, %cst_102 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%799 = "mhlo.reshape"(%798) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%800 = chlo.broadcast_add %799, %790 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%801 = chlo.broadcast_multiply %800, %cst_103 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%802 = chlo.broadcast_add %801, %cst_104 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%803 = "mhlo.reshape"(%802) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%804 = "mhlo.dot"(%803, %cst_99) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%805 = chlo.broadcast_add %804, %cst_100 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%806 = "mhlo.reshape"(%805) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%807 = chlo.broadcast_maximum %806, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%808 = "mhlo.reshape"(%807) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%809 = "mhlo.dot"(%808, %cst_91) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%810 = chlo.broadcast_add %809, %cst_92 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%811 = "mhlo.reshape"(%810) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%812 = chlo.broadcast_add %811, %802 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%813 = chlo.broadcast_multiply %812, %cst_97 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%814 = chlo.broadcast_add %813, %cst_98 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%815 = "mhlo.reshape"(%814) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%816 = "mhlo.dot"(%815, %cst_93) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%817 = chlo.broadcast_add %816, %cst_94 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%818 = "mhlo.reshape"(%817) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%819 = chlo.broadcast_add %818, %722 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%820 = chlo.broadcast_multiply %819, %cst_95 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%821 = chlo.broadcast_add %820, %cst_96 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%822 = "mhlo.reshape"(%821) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%823 = "mhlo.dot"(%822, %cst_81) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%824 = chlo.broadcast_add %823, %cst_82 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%825 = "mhlo.reshape"(%824) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%826 = "mhlo.transpose"(%825) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%827 = "mhlo.dot"(%822, %cst_77) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%828 = "mhlo.reshape"(%827) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%829 = "mhlo.broadcast_in_dim"(%cst_78) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%830 = mhlo.add %828, %829 : tensor<1x384x128xf32>
%831 = chlo.broadcast_multiply %830, %cst_79 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%832 = chlo.broadcast_add %831, %cst_80 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%833 = "mhlo.reshape"(%832) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%834 = "mhlo.dot"(%833, %cst_85) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%835 = chlo.broadcast_add %834, %cst_86 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%836 = "mhlo.reshape"(%835) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%837 = "mhlo.transpose"(%836) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%838 = "mhlo.dot"(%833, %cst_83) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%839 = chlo.broadcast_add %838, %cst_84 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%840 = "mhlo.reshape"(%839) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%841 = "mhlo.transpose"(%840) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%842 = "mhlo.dot_general"(%841, %837) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%843 = chlo.broadcast_multiply %842, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%844 = chlo.broadcast_add %843, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%845 = "mhlo.reduce"(%844, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%846 = linalg.tensor_expand_shape %845 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%847 = chlo.broadcast_subtract %844, %846 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%848 = "mhlo.exponential"(%847) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%849 = "mhlo.reduce"(%848, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%850 = linalg.tensor_expand_shape %849 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%851 = chlo.broadcast_divide %848, %850 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%852 = "mhlo.dot_general"(%851, %826) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%853 = "mhlo.transpose"(%852) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%854 = "mhlo.reshape"(%853) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%855 = "mhlo.dot"(%854, %cst_87) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%856 = chlo.broadcast_add %855, %cst_88 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%857 = "mhlo.reshape"(%856) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%858 = "mhlo.dot"(%822, %cst_74) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%859 = chlo.broadcast_add %858, %cst_75 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%860 = "mhlo.reshape"(%859) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%861 = chlo.broadcast_multiply %860, %cst_76 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%862 = chlo.broadcast_add %861, %cst_88 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%863 = chlo.broadcast_add %857, %862 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%864 = chlo.broadcast_multiply %863, %cst_89 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%865 = chlo.broadcast_add %864, %cst_90 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%866 = "mhlo.reshape"(%865) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%867 = "mhlo.dot"(%866, %cst_72) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%868 = chlo.broadcast_add %867, %cst_73 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%869 = "mhlo.reshape"(%868) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%870 = chlo.broadcast_maximum %869, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%871 = "mhlo.reshape"(%870) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%872 = "mhlo.dot"(%871, %cst_68) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%873 = chlo.broadcast_add %872, %cst_69 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%874 = "mhlo.reshape"(%873) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%875 = chlo.broadcast_add %874, %865 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%876 = chlo.broadcast_multiply %875, %cst_70 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%877 = chlo.broadcast_add %876, %cst_71 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%878 = "mhlo.reshape"(%877) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%879 = "mhlo.dot"(%878, %cst_66) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%880 = chlo.broadcast_add %879, %cst_67 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%881 = "mhlo.reshape"(%880) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%882 = chlo.broadcast_maximum %881, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%883 = "mhlo.reshape"(%882) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%884 = "mhlo.dot"(%883, %cst_62) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%885 = chlo.broadcast_add %884, %cst_63 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%886 = "mhlo.reshape"(%885) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%887 = chlo.broadcast_add %886, %877 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%888 = chlo.broadcast_multiply %887, %cst_64 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%889 = chlo.broadcast_add %888, %cst_65 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%890 = "mhlo.reshape"(%889) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%891 = "mhlo.dot"(%890, %cst_60) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%892 = chlo.broadcast_add %891, %cst_61 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%893 = "mhlo.reshape"(%892) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%894 = chlo.broadcast_maximum %893, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%895 = "mhlo.reshape"(%894) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%896 = "mhlo.dot"(%895, %cst_56) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%897 = chlo.broadcast_add %896, %cst_57 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%898 = "mhlo.reshape"(%897) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%899 = chlo.broadcast_add %898, %889 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%900 = chlo.broadcast_multiply %899, %cst_58 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%901 = chlo.broadcast_add %900, %cst_59 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%902 = "mhlo.reshape"(%901) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%903 = "mhlo.dot"(%902, %cst_54) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%904 = chlo.broadcast_add %903, %cst_55 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%905 = "mhlo.reshape"(%904) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%906 = chlo.broadcast_maximum %905, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%907 = "mhlo.reshape"(%906) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%908 = "mhlo.dot"(%907, %cst_46) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%909 = chlo.broadcast_add %908, %cst_47 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%910 = "mhlo.reshape"(%909) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%911 = chlo.broadcast_add %910, %901 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%912 = chlo.broadcast_multiply %911, %cst_52 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%913 = chlo.broadcast_add %912, %cst_53 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%914 = "mhlo.reshape"(%913) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%915 = "mhlo.dot"(%914, %cst_48) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%916 = chlo.broadcast_add %915, %cst_49 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%917 = "mhlo.reshape"(%916) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%918 = chlo.broadcast_add %917, %821 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%919 = chlo.broadcast_multiply %918, %cst_50 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%920 = chlo.broadcast_add %919, %cst_51 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%921 = "mhlo.reshape"(%920) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%922 = "mhlo.dot"(%921, %cst_36) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%923 = chlo.broadcast_add %922, %cst_37 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%924 = "mhlo.reshape"(%923) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%925 = "mhlo.transpose"(%924) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%926 = "mhlo.dot"(%921, %cst_32) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%927 = "mhlo.reshape"(%926) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%928 = "mhlo.broadcast_in_dim"(%cst_33) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%929 = mhlo.add %927, %928 : tensor<1x384x128xf32>
%930 = chlo.broadcast_multiply %929, %cst_34 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%931 = chlo.broadcast_add %930, %cst_35 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%932 = "mhlo.reshape"(%931) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%933 = "mhlo.dot"(%932, %cst_40) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%934 = chlo.broadcast_add %933, %cst_41 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%935 = "mhlo.reshape"(%934) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%936 = "mhlo.transpose"(%935) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%937 = "mhlo.dot"(%932, %cst_38) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%938 = chlo.broadcast_add %937, %cst_39 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%939 = "mhlo.reshape"(%938) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%940 = "mhlo.transpose"(%939) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%941 = "mhlo.dot_general"(%940, %936) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%942 = chlo.broadcast_multiply %941, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%943 = chlo.broadcast_add %942, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%944 = "mhlo.reduce"(%943, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%945 = linalg.tensor_expand_shape %944 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%946 = chlo.broadcast_subtract %943, %945 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%947 = "mhlo.exponential"(%946) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%948 = "mhlo.reduce"(%947, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%949 = linalg.tensor_expand_shape %948 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%950 = chlo.broadcast_divide %947, %949 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%951 = "mhlo.dot_general"(%950, %925) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%952 = "mhlo.transpose"(%951) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%953 = "mhlo.reshape"(%952) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%954 = "mhlo.dot"(%953, %cst_42) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%955 = chlo.broadcast_add %954, %cst_43 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%956 = "mhlo.reshape"(%955) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%957 = "mhlo.dot"(%921, %cst_29) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%958 = chlo.broadcast_add %957, %cst_30 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%959 = "mhlo.reshape"(%958) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%960 = chlo.broadcast_multiply %959, %cst_31 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%961 = chlo.broadcast_add %960, %cst_43 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%962 = chlo.broadcast_add %956, %961 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%963 = chlo.broadcast_multiply %962, %cst_44 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%964 = chlo.broadcast_add %963, %cst_45 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%965 = "mhlo.reshape"(%964) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%966 = "mhlo.dot"(%965, %cst_27) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%967 = chlo.broadcast_add %966, %cst_28 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%968 = "mhlo.reshape"(%967) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%969 = chlo.broadcast_maximum %968, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%970 = "mhlo.reshape"(%969) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%971 = "mhlo.dot"(%970, %cst_23) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%972 = chlo.broadcast_add %971, %cst_24 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%973 = "mhlo.reshape"(%972) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%974 = chlo.broadcast_add %973, %964 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%975 = chlo.broadcast_multiply %974, %cst_25 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%976 = chlo.broadcast_add %975, %cst_26 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%977 = "mhlo.reshape"(%976) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%978 = "mhlo.dot"(%977, %cst_21) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%979 = chlo.broadcast_add %978, %cst_22 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%980 = "mhlo.reshape"(%979) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%981 = chlo.broadcast_maximum %980, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%982 = "mhlo.reshape"(%981) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%983 = "mhlo.dot"(%982, %cst_17) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%984 = chlo.broadcast_add %983, %cst_18 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%985 = "mhlo.reshape"(%984) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%986 = chlo.broadcast_add %985, %976 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%987 = chlo.broadcast_multiply %986, %cst_19 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%988 = chlo.broadcast_add %987, %cst_20 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%989 = "mhlo.reshape"(%988) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%990 = "mhlo.dot"(%989, %cst_15) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%991 = chlo.broadcast_add %990, %cst_16 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%992 = "mhlo.reshape"(%991) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%993 = chlo.broadcast_maximum %992, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%994 = "mhlo.reshape"(%993) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%995 = "mhlo.dot"(%994, %cst_11) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%996 = chlo.broadcast_add %995, %cst_12 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%997 = "mhlo.reshape"(%996) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%998 = chlo.broadcast_add %997, %988 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%999 = chlo.broadcast_multiply %998, %cst_13 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1000 = chlo.broadcast_add %999, %cst_14 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1001 = "mhlo.reshape"(%1000) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1002 = "mhlo.dot"(%1001, %cst_9) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1003 = chlo.broadcast_add %1002, %cst_10 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1004 = "mhlo.reshape"(%1003) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1005 = chlo.broadcast_maximum %1004, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1006 = "mhlo.reshape"(%1005) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1007 = "mhlo.dot"(%1006, %cst_1) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1008 = chlo.broadcast_add %1007, %cst_2 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1009 = "mhlo.reshape"(%1008) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1010 = chlo.broadcast_add %1009, %1000 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1011 = chlo.broadcast_multiply %1010, %cst_7 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1012 = chlo.broadcast_add %1011, %cst_8 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1013 = "mhlo.reshape"(%1012) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1014 = "mhlo.dot"(%1013, %cst_3) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1015 = chlo.broadcast_add %1014, %cst_4 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1016 = "mhlo.reshape"(%1015) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1017 = chlo.broadcast_add %1016, %920 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1018 = chlo.broadcast_multiply %1017, %cst_5 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1019 = chlo.broadcast_add %1018, %cst_6 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1020 = "mhlo.reshape"(%1019) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1021 = "mhlo.dot"(%1020, %cst_981) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1022 = chlo.broadcast_add %1021, %cst_982 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1023 = "mhlo.reshape"(%1022) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1024 = "mhlo.transpose"(%1023) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1025 = "mhlo.dot"(%1020, %cst_977) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1026 = "mhlo.reshape"(%1025) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1027 = "mhlo.broadcast_in_dim"(%cst_978) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1028 = mhlo.add %1026, %1027 : tensor<1x384x128xf32>
%1029 = chlo.broadcast_multiply %1028, %cst_979 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1030 = chlo.broadcast_add %1029, %cst_980 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1031 = "mhlo.reshape"(%1030) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1032 = "mhlo.dot"(%1031, %cst_985) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1033 = chlo.broadcast_add %1032, %cst_986 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1034 = "mhlo.reshape"(%1033) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1035 = "mhlo.transpose"(%1034) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1036 = "mhlo.dot"(%1031, %cst_983) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1037 = chlo.broadcast_add %1036, %cst_984 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1038 = "mhlo.reshape"(%1037) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1039 = "mhlo.transpose"(%1038) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1040 = "mhlo.dot_general"(%1039, %1035) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1041 = chlo.broadcast_multiply %1040, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1042 = chlo.broadcast_add %1041, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1043 = "mhlo.reduce"(%1042, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1044 = linalg.tensor_expand_shape %1043 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1045 = chlo.broadcast_subtract %1042, %1044 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1046 = "mhlo.exponential"(%1045) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1047 = "mhlo.reduce"(%1046, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1048 = linalg.tensor_expand_shape %1047 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1049 = chlo.broadcast_divide %1046, %1048 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1050 = "mhlo.dot_general"(%1049, %1024) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1051 = "mhlo.transpose"(%1050) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1052 = "mhlo.reshape"(%1051) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1053 = "mhlo.dot"(%1052, %cst_987) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1054 = chlo.broadcast_add %1053, %cst_988 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1055 = "mhlo.reshape"(%1054) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1056 = "mhlo.dot"(%1020, %cst_974) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1057 = chlo.broadcast_add %1056, %cst_975 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1058 = "mhlo.reshape"(%1057) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1059 = chlo.broadcast_multiply %1058, %cst_976 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1060 = chlo.broadcast_add %1059, %cst_988 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1061 = chlo.broadcast_add %1055, %1060 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1062 = chlo.broadcast_multiply %1061, %cst_989 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1063 = chlo.broadcast_add %1062, %cst_990 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1064 = "mhlo.reshape"(%1063) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1065 = "mhlo.dot"(%1064, %cst_972) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1066 = chlo.broadcast_add %1065, %cst_973 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1067 = "mhlo.reshape"(%1066) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1068 = chlo.broadcast_maximum %1067, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1069 = "mhlo.reshape"(%1068) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1070 = "mhlo.dot"(%1069, %cst_968) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1071 = chlo.broadcast_add %1070, %cst_969 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1072 = "mhlo.reshape"(%1071) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1073 = chlo.broadcast_add %1072, %1063 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1074 = chlo.broadcast_multiply %1073, %cst_970 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1075 = chlo.broadcast_add %1074, %cst_971 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1076 = "mhlo.reshape"(%1075) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1077 = "mhlo.dot"(%1076, %cst_966) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1078 = chlo.broadcast_add %1077, %cst_967 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1079 = "mhlo.reshape"(%1078) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1080 = chlo.broadcast_maximum %1079, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1081 = "mhlo.reshape"(%1080) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1082 = "mhlo.dot"(%1081, %cst_962) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1083 = chlo.broadcast_add %1082, %cst_963 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1084 = "mhlo.reshape"(%1083) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1085 = chlo.broadcast_add %1084, %1075 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1086 = chlo.broadcast_multiply %1085, %cst_964 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1087 = chlo.broadcast_add %1086, %cst_965 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1088 = "mhlo.reshape"(%1087) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1089 = "mhlo.dot"(%1088, %cst_960) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1090 = chlo.broadcast_add %1089, %cst_961 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1091 = "mhlo.reshape"(%1090) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1092 = chlo.broadcast_maximum %1091, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1093 = "mhlo.reshape"(%1092) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1094 = "mhlo.dot"(%1093, %cst_956) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1095 = chlo.broadcast_add %1094, %cst_957 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1096 = "mhlo.reshape"(%1095) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1097 = chlo.broadcast_add %1096, %1087 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1098 = chlo.broadcast_multiply %1097, %cst_958 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1099 = chlo.broadcast_add %1098, %cst_959 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1100 = "mhlo.reshape"(%1099) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1101 = "mhlo.dot"(%1100, %cst_954) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1102 = chlo.broadcast_add %1101, %cst_955 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1103 = "mhlo.reshape"(%1102) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1104 = chlo.broadcast_maximum %1103, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1105 = "mhlo.reshape"(%1104) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1106 = "mhlo.dot"(%1105, %cst_946) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1107 = chlo.broadcast_add %1106, %cst_947 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1108 = "mhlo.reshape"(%1107) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1109 = chlo.broadcast_add %1108, %1099 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1110 = chlo.broadcast_multiply %1109, %cst_952 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1111 = chlo.broadcast_add %1110, %cst_953 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1112 = "mhlo.reshape"(%1111) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1113 = "mhlo.dot"(%1112, %cst_948) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1114 = chlo.broadcast_add %1113, %cst_949 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1115 = "mhlo.reshape"(%1114) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1116 = chlo.broadcast_add %1115, %1019 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1117 = chlo.broadcast_multiply %1116, %cst_950 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1118 = chlo.broadcast_add %1117, %cst_951 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1119 = "mhlo.reshape"(%1118) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1120 = "mhlo.dot"(%1119, %cst_936) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1121 = chlo.broadcast_add %1120, %cst_937 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1122 = "mhlo.reshape"(%1121) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1123 = "mhlo.transpose"(%1122) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1124 = "mhlo.dot"(%1119, %cst_932) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1125 = "mhlo.reshape"(%1124) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1126 = "mhlo.broadcast_in_dim"(%cst_933) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1127 = mhlo.add %1125, %1126 : tensor<1x384x128xf32>
%1128 = chlo.broadcast_multiply %1127, %cst_934 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1129 = chlo.broadcast_add %1128, %cst_935 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1130 = "mhlo.reshape"(%1129) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1131 = "mhlo.dot"(%1130, %cst_940) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1132 = chlo.broadcast_add %1131, %cst_941 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1133 = "mhlo.reshape"(%1132) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1134 = "mhlo.transpose"(%1133) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1135 = "mhlo.dot"(%1130, %cst_938) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1136 = chlo.broadcast_add %1135, %cst_939 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1137 = "mhlo.reshape"(%1136) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1138 = "mhlo.transpose"(%1137) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1139 = "mhlo.dot_general"(%1138, %1134) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1140 = chlo.broadcast_multiply %1139, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1141 = chlo.broadcast_add %1140, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1142 = "mhlo.reduce"(%1141, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1143 = linalg.tensor_expand_shape %1142 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1144 = chlo.broadcast_subtract %1141, %1143 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1145 = "mhlo.exponential"(%1144) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1146 = "mhlo.reduce"(%1145, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1147 = linalg.tensor_expand_shape %1146 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1148 = chlo.broadcast_divide %1145, %1147 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1149 = "mhlo.dot_general"(%1148, %1123) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1150 = "mhlo.transpose"(%1149) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1151 = "mhlo.reshape"(%1150) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1152 = "mhlo.dot"(%1151, %cst_942) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1153 = chlo.broadcast_add %1152, %cst_943 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1154 = "mhlo.reshape"(%1153) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1155 = "mhlo.dot"(%1119, %cst_929) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1156 = chlo.broadcast_add %1155, %cst_930 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1157 = "mhlo.reshape"(%1156) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1158 = chlo.broadcast_multiply %1157, %cst_931 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1159 = chlo.broadcast_add %1158, %cst_943 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1160 = chlo.broadcast_add %1154, %1159 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1161 = chlo.broadcast_multiply %1160, %cst_944 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1162 = chlo.broadcast_add %1161, %cst_945 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1163 = "mhlo.reshape"(%1162) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1164 = "mhlo.dot"(%1163, %cst_927) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1165 = chlo.broadcast_add %1164, %cst_928 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1166 = "mhlo.reshape"(%1165) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1167 = chlo.broadcast_maximum %1166, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1168 = "mhlo.reshape"(%1167) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1169 = "mhlo.dot"(%1168, %cst_923) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1170 = chlo.broadcast_add %1169, %cst_924 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1171 = "mhlo.reshape"(%1170) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1172 = chlo.broadcast_add %1171, %1162 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1173 = chlo.broadcast_multiply %1172, %cst_925 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1174 = chlo.broadcast_add %1173, %cst_926 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1175 = "mhlo.reshape"(%1174) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1176 = "mhlo.dot"(%1175, %cst_921) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1177 = chlo.broadcast_add %1176, %cst_922 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1178 = "mhlo.reshape"(%1177) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1179 = chlo.broadcast_maximum %1178, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1180 = "mhlo.reshape"(%1179) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1181 = "mhlo.dot"(%1180, %cst_917) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1182 = chlo.broadcast_add %1181, %cst_918 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1183 = "mhlo.reshape"(%1182) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1184 = chlo.broadcast_add %1183, %1174 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1185 = chlo.broadcast_multiply %1184, %cst_919 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1186 = chlo.broadcast_add %1185, %cst_920 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1187 = "mhlo.reshape"(%1186) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1188 = "mhlo.dot"(%1187, %cst_915) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1189 = chlo.broadcast_add %1188, %cst_916 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1190 = "mhlo.reshape"(%1189) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1191 = chlo.broadcast_maximum %1190, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1192 = "mhlo.reshape"(%1191) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1193 = "mhlo.dot"(%1192, %cst_911) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1194 = chlo.broadcast_add %1193, %cst_912 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1195 = "mhlo.reshape"(%1194) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1196 = chlo.broadcast_add %1195, %1186 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1197 = chlo.broadcast_multiply %1196, %cst_913 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1198 = chlo.broadcast_add %1197, %cst_914 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1199 = "mhlo.reshape"(%1198) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1200 = "mhlo.dot"(%1199, %cst_909) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1201 = chlo.broadcast_add %1200, %cst_910 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1202 = "mhlo.reshape"(%1201) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1203 = chlo.broadcast_maximum %1202, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1204 = "mhlo.reshape"(%1203) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1205 = "mhlo.dot"(%1204, %cst_901) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1206 = chlo.broadcast_add %1205, %cst_902 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1207 = "mhlo.reshape"(%1206) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1208 = chlo.broadcast_add %1207, %1198 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1209 = chlo.broadcast_multiply %1208, %cst_907 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1210 = chlo.broadcast_add %1209, %cst_908 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1211 = "mhlo.reshape"(%1210) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1212 = "mhlo.dot"(%1211, %cst_903) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1213 = chlo.broadcast_add %1212, %cst_904 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1214 = "mhlo.reshape"(%1213) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1215 = chlo.broadcast_add %1214, %1118 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1216 = chlo.broadcast_multiply %1215, %cst_905 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1217 = chlo.broadcast_add %1216, %cst_906 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1218 = "mhlo.reshape"(%1217) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1219 = "mhlo.dot"(%1218, %cst_891) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1220 = chlo.broadcast_add %1219, %cst_892 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1221 = "mhlo.reshape"(%1220) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1222 = "mhlo.transpose"(%1221) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1223 = "mhlo.dot"(%1218, %cst_887) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1224 = "mhlo.reshape"(%1223) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1225 = "mhlo.broadcast_in_dim"(%cst_888) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1226 = mhlo.add %1224, %1225 : tensor<1x384x128xf32>
%1227 = chlo.broadcast_multiply %1226, %cst_889 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1228 = chlo.broadcast_add %1227, %cst_890 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1229 = "mhlo.reshape"(%1228) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1230 = "mhlo.dot"(%1229, %cst_895) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1231 = chlo.broadcast_add %1230, %cst_896 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1232 = "mhlo.reshape"(%1231) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1233 = "mhlo.transpose"(%1232) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1234 = "mhlo.dot"(%1229, %cst_893) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1235 = chlo.broadcast_add %1234, %cst_894 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1236 = "mhlo.reshape"(%1235) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1237 = "mhlo.transpose"(%1236) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1238 = "mhlo.dot_general"(%1237, %1233) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1239 = chlo.broadcast_multiply %1238, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1240 = chlo.broadcast_add %1239, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1241 = "mhlo.reduce"(%1240, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1242 = linalg.tensor_expand_shape %1241 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1243 = chlo.broadcast_subtract %1240, %1242 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1244 = "mhlo.exponential"(%1243) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1245 = "mhlo.reduce"(%1244, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1246 = linalg.tensor_expand_shape %1245 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1247 = chlo.broadcast_divide %1244, %1246 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1248 = "mhlo.dot_general"(%1247, %1222) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1249 = "mhlo.transpose"(%1248) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1250 = "mhlo.reshape"(%1249) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1251 = "mhlo.dot"(%1250, %cst_897) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1252 = chlo.broadcast_add %1251, %cst_898 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1253 = "mhlo.reshape"(%1252) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1254 = "mhlo.dot"(%1218, %cst_884) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1255 = chlo.broadcast_add %1254, %cst_885 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1256 = "mhlo.reshape"(%1255) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1257 = chlo.broadcast_multiply %1256, %cst_886 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1258 = chlo.broadcast_add %1257, %cst_898 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1259 = chlo.broadcast_add %1253, %1258 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1260 = chlo.broadcast_multiply %1259, %cst_899 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1261 = chlo.broadcast_add %1260, %cst_900 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1262 = "mhlo.reshape"(%1261) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1263 = "mhlo.dot"(%1262, %cst_882) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1264 = chlo.broadcast_add %1263, %cst_883 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1265 = "mhlo.reshape"(%1264) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1266 = chlo.broadcast_maximum %1265, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1267 = "mhlo.reshape"(%1266) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1268 = "mhlo.dot"(%1267, %cst_878) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1269 = chlo.broadcast_add %1268, %cst_879 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1270 = "mhlo.reshape"(%1269) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1271 = chlo.broadcast_add %1270, %1261 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1272 = chlo.broadcast_multiply %1271, %cst_880 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1273 = chlo.broadcast_add %1272, %cst_881 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1274 = "mhlo.reshape"(%1273) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1275 = "mhlo.dot"(%1274, %cst_876) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1276 = chlo.broadcast_add %1275, %cst_877 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1277 = "mhlo.reshape"(%1276) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1278 = chlo.broadcast_maximum %1277, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1279 = "mhlo.reshape"(%1278) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1280 = "mhlo.dot"(%1279, %cst_872) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1281 = chlo.broadcast_add %1280, %cst_873 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1282 = "mhlo.reshape"(%1281) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1283 = chlo.broadcast_add %1282, %1273 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1284 = chlo.broadcast_multiply %1283, %cst_874 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1285 = chlo.broadcast_add %1284, %cst_875 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1286 = "mhlo.reshape"(%1285) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1287 = "mhlo.dot"(%1286, %cst_870) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1288 = chlo.broadcast_add %1287, %cst_871 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1289 = "mhlo.reshape"(%1288) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1290 = chlo.broadcast_maximum %1289, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1291 = "mhlo.reshape"(%1290) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1292 = "mhlo.dot"(%1291, %cst_866) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1293 = chlo.broadcast_add %1292, %cst_867 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1294 = "mhlo.reshape"(%1293) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1295 = chlo.broadcast_add %1294, %1285 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1296 = chlo.broadcast_multiply %1295, %cst_868 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1297 = chlo.broadcast_add %1296, %cst_869 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1298 = "mhlo.reshape"(%1297) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1299 = "mhlo.dot"(%1298, %cst_864) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1300 = chlo.broadcast_add %1299, %cst_865 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1301 = "mhlo.reshape"(%1300) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1302 = chlo.broadcast_maximum %1301, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1303 = "mhlo.reshape"(%1302) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1304 = "mhlo.dot"(%1303, %cst_856) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1305 = chlo.broadcast_add %1304, %cst_857 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1306 = "mhlo.reshape"(%1305) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1307 = chlo.broadcast_add %1306, %1297 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1308 = chlo.broadcast_multiply %1307, %cst_862 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1309 = chlo.broadcast_add %1308, %cst_863 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1310 = "mhlo.reshape"(%1309) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1311 = "mhlo.dot"(%1310, %cst_858) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1312 = chlo.broadcast_add %1311, %cst_859 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1313 = "mhlo.reshape"(%1312) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1314 = chlo.broadcast_add %1313, %1217 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1315 = chlo.broadcast_multiply %1314, %cst_860 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1316 = chlo.broadcast_add %1315, %cst_861 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1317 = "mhlo.reshape"(%1316) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1318 = "mhlo.dot"(%1317, %cst_846) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1319 = chlo.broadcast_add %1318, %cst_847 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1320 = "mhlo.reshape"(%1319) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1321 = "mhlo.transpose"(%1320) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1322 = "mhlo.dot"(%1317, %cst_842) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1323 = "mhlo.reshape"(%1322) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1324 = "mhlo.broadcast_in_dim"(%cst_843) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1325 = mhlo.add %1323, %1324 : tensor<1x384x128xf32>
%1326 = chlo.broadcast_multiply %1325, %cst_844 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1327 = chlo.broadcast_add %1326, %cst_845 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1328 = "mhlo.reshape"(%1327) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1329 = "mhlo.dot"(%1328, %cst_850) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1330 = chlo.broadcast_add %1329, %cst_851 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1331 = "mhlo.reshape"(%1330) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1332 = "mhlo.transpose"(%1331) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1333 = "mhlo.dot"(%1328, %cst_848) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1334 = chlo.broadcast_add %1333, %cst_849 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1335 = "mhlo.reshape"(%1334) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1336 = "mhlo.transpose"(%1335) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1337 = "mhlo.dot_general"(%1336, %1332) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1338 = chlo.broadcast_multiply %1337, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1339 = chlo.broadcast_add %1338, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1340 = "mhlo.reduce"(%1339, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1341 = linalg.tensor_expand_shape %1340 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1342 = chlo.broadcast_subtract %1339, %1341 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1343 = "mhlo.exponential"(%1342) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1344 = "mhlo.reduce"(%1343, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1345 = linalg.tensor_expand_shape %1344 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1346 = chlo.broadcast_divide %1343, %1345 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1347 = "mhlo.dot_general"(%1346, %1321) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1348 = "mhlo.transpose"(%1347) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1349 = "mhlo.reshape"(%1348) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1350 = "mhlo.dot"(%1349, %cst_852) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1351 = chlo.broadcast_add %1350, %cst_853 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1352 = "mhlo.reshape"(%1351) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1353 = "mhlo.dot"(%1317, %cst_839) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1354 = chlo.broadcast_add %1353, %cst_840 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1355 = "mhlo.reshape"(%1354) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1356 = chlo.broadcast_multiply %1355, %cst_841 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1357 = chlo.broadcast_add %1356, %cst_853 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1358 = chlo.broadcast_add %1352, %1357 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1359 = chlo.broadcast_multiply %1358, %cst_854 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1360 = chlo.broadcast_add %1359, %cst_855 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1361 = "mhlo.reshape"(%1360) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1362 = "mhlo.dot"(%1361, %cst_837) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1363 = chlo.broadcast_add %1362, %cst_838 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1364 = "mhlo.reshape"(%1363) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1365 = chlo.broadcast_maximum %1364, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1366 = "mhlo.reshape"(%1365) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1367 = "mhlo.dot"(%1366, %cst_833) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1368 = chlo.broadcast_add %1367, %cst_834 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1369 = "mhlo.reshape"(%1368) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1370 = chlo.broadcast_add %1369, %1360 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1371 = chlo.broadcast_multiply %1370, %cst_835 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1372 = chlo.broadcast_add %1371, %cst_836 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1373 = "mhlo.reshape"(%1372) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1374 = "mhlo.dot"(%1373, %cst_831) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1375 = chlo.broadcast_add %1374, %cst_832 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1376 = "mhlo.reshape"(%1375) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1377 = chlo.broadcast_maximum %1376, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1378 = "mhlo.reshape"(%1377) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1379 = "mhlo.dot"(%1378, %cst_827) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1380 = chlo.broadcast_add %1379, %cst_828 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1381 = "mhlo.reshape"(%1380) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1382 = chlo.broadcast_add %1381, %1372 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1383 = chlo.broadcast_multiply %1382, %cst_829 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1384 = chlo.broadcast_add %1383, %cst_830 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1385 = "mhlo.reshape"(%1384) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1386 = "mhlo.dot"(%1385, %cst_825) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1387 = chlo.broadcast_add %1386, %cst_826 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1388 = "mhlo.reshape"(%1387) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1389 = chlo.broadcast_maximum %1388, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1390 = "mhlo.reshape"(%1389) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1391 = "mhlo.dot"(%1390, %cst_821) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1392 = chlo.broadcast_add %1391, %cst_822 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1393 = "mhlo.reshape"(%1392) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1394 = chlo.broadcast_add %1393, %1384 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1395 = chlo.broadcast_multiply %1394, %cst_823 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1396 = chlo.broadcast_add %1395, %cst_824 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1397 = "mhlo.reshape"(%1396) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1398 = "mhlo.dot"(%1397, %cst_819) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1399 = chlo.broadcast_add %1398, %cst_820 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1400 = "mhlo.reshape"(%1399) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1401 = chlo.broadcast_maximum %1400, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1402 = "mhlo.reshape"(%1401) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1403 = "mhlo.dot"(%1402, %cst_811) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1404 = chlo.broadcast_add %1403, %cst_812 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1405 = "mhlo.reshape"(%1404) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1406 = chlo.broadcast_add %1405, %1396 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1407 = chlo.broadcast_multiply %1406, %cst_817 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1408 = chlo.broadcast_add %1407, %cst_818 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1409 = "mhlo.reshape"(%1408) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1410 = "mhlo.dot"(%1409, %cst_813) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1411 = chlo.broadcast_add %1410, %cst_814 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1412 = "mhlo.reshape"(%1411) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1413 = chlo.broadcast_add %1412, %1316 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1414 = chlo.broadcast_multiply %1413, %cst_815 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1415 = chlo.broadcast_add %1414, %cst_816 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1416 = "mhlo.reshape"(%1415) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1417 = "mhlo.dot"(%1416, %cst_801) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1418 = chlo.broadcast_add %1417, %cst_802 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1419 = "mhlo.reshape"(%1418) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1420 = "mhlo.transpose"(%1419) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1421 = "mhlo.dot"(%1416, %cst_797) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1422 = "mhlo.reshape"(%1421) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1423 = "mhlo.broadcast_in_dim"(%cst_798) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1424 = mhlo.add %1422, %1423 : tensor<1x384x128xf32>
%1425 = chlo.broadcast_multiply %1424, %cst_799 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1426 = chlo.broadcast_add %1425, %cst_800 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1427 = "mhlo.reshape"(%1426) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1428 = "mhlo.dot"(%1427, %cst_805) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1429 = chlo.broadcast_add %1428, %cst_806 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1430 = "mhlo.reshape"(%1429) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1431 = "mhlo.transpose"(%1430) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1432 = "mhlo.dot"(%1427, %cst_803) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1433 = chlo.broadcast_add %1432, %cst_804 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1434 = "mhlo.reshape"(%1433) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1435 = "mhlo.transpose"(%1434) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1436 = "mhlo.dot_general"(%1435, %1431) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1437 = chlo.broadcast_multiply %1436, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1438 = chlo.broadcast_add %1437, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1439 = "mhlo.reduce"(%1438, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1440 = linalg.tensor_expand_shape %1439 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1441 = chlo.broadcast_subtract %1438, %1440 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1442 = "mhlo.exponential"(%1441) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1443 = "mhlo.reduce"(%1442, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1444 = linalg.tensor_expand_shape %1443 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1445 = chlo.broadcast_divide %1442, %1444 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1446 = "mhlo.dot_general"(%1445, %1420) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1447 = "mhlo.transpose"(%1446) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1448 = "mhlo.reshape"(%1447) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1449 = "mhlo.dot"(%1448, %cst_807) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1450 = chlo.broadcast_add %1449, %cst_808 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1451 = "mhlo.reshape"(%1450) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1452 = "mhlo.dot"(%1416, %cst_794) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1453 = chlo.broadcast_add %1452, %cst_795 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1454 = "mhlo.reshape"(%1453) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1455 = chlo.broadcast_multiply %1454, %cst_796 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1456 = chlo.broadcast_add %1455, %cst_808 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1457 = chlo.broadcast_add %1451, %1456 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1458 = chlo.broadcast_multiply %1457, %cst_809 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1459 = chlo.broadcast_add %1458, %cst_810 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1460 = "mhlo.reshape"(%1459) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1461 = "mhlo.dot"(%1460, %cst_792) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1462 = chlo.broadcast_add %1461, %cst_793 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1463 = "mhlo.reshape"(%1462) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1464 = chlo.broadcast_maximum %1463, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1465 = "mhlo.reshape"(%1464) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1466 = "mhlo.dot"(%1465, %cst_788) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1467 = chlo.broadcast_add %1466, %cst_789 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1468 = "mhlo.reshape"(%1467) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1469 = chlo.broadcast_add %1468, %1459 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1470 = chlo.broadcast_multiply %1469, %cst_790 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1471 = chlo.broadcast_add %1470, %cst_791 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1472 = "mhlo.reshape"(%1471) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1473 = "mhlo.dot"(%1472, %cst_786) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1474 = chlo.broadcast_add %1473, %cst_787 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1475 = "mhlo.reshape"(%1474) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1476 = chlo.broadcast_maximum %1475, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1477 = "mhlo.reshape"(%1476) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1478 = "mhlo.dot"(%1477, %cst_782) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1479 = chlo.broadcast_add %1478, %cst_783 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1480 = "mhlo.reshape"(%1479) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1481 = chlo.broadcast_add %1480, %1471 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1482 = chlo.broadcast_multiply %1481, %cst_784 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1483 = chlo.broadcast_add %1482, %cst_785 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1484 = "mhlo.reshape"(%1483) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1485 = "mhlo.dot"(%1484, %cst_780) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1486 = chlo.broadcast_add %1485, %cst_781 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1487 = "mhlo.reshape"(%1486) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1488 = chlo.broadcast_maximum %1487, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1489 = "mhlo.reshape"(%1488) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1490 = "mhlo.dot"(%1489, %cst_776) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1491 = chlo.broadcast_add %1490, %cst_777 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1492 = "mhlo.reshape"(%1491) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1493 = chlo.broadcast_add %1492, %1483 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1494 = chlo.broadcast_multiply %1493, %cst_778 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1495 = chlo.broadcast_add %1494, %cst_779 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1496 = "mhlo.reshape"(%1495) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1497 = "mhlo.dot"(%1496, %cst_774) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1498 = chlo.broadcast_add %1497, %cst_775 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1499 = "mhlo.reshape"(%1498) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1500 = chlo.broadcast_maximum %1499, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1501 = "mhlo.reshape"(%1500) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1502 = "mhlo.dot"(%1501, %cst_766) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1503 = chlo.broadcast_add %1502, %cst_767 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1504 = "mhlo.reshape"(%1503) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1505 = chlo.broadcast_add %1504, %1495 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1506 = chlo.broadcast_multiply %1505, %cst_772 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1507 = chlo.broadcast_add %1506, %cst_773 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1508 = "mhlo.reshape"(%1507) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1509 = "mhlo.dot"(%1508, %cst_768) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1510 = chlo.broadcast_add %1509, %cst_769 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1511 = "mhlo.reshape"(%1510) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1512 = chlo.broadcast_add %1511, %1415 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1513 = chlo.broadcast_multiply %1512, %cst_770 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1514 = chlo.broadcast_add %1513, %cst_771 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1515 = "mhlo.reshape"(%1514) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1516 = "mhlo.dot"(%1515, %cst_756) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1517 = chlo.broadcast_add %1516, %cst_757 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1518 = "mhlo.reshape"(%1517) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1519 = "mhlo.transpose"(%1518) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1520 = "mhlo.dot"(%1515, %cst_752) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1521 = "mhlo.reshape"(%1520) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1522 = "mhlo.broadcast_in_dim"(%cst_753) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1523 = mhlo.add %1521, %1522 : tensor<1x384x128xf32>
%1524 = chlo.broadcast_multiply %1523, %cst_754 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1525 = chlo.broadcast_add %1524, %cst_755 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1526 = "mhlo.reshape"(%1525) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1527 = "mhlo.dot"(%1526, %cst_760) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1528 = chlo.broadcast_add %1527, %cst_761 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1529 = "mhlo.reshape"(%1528) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1530 = "mhlo.transpose"(%1529) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1531 = "mhlo.dot"(%1526, %cst_758) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1532 = chlo.broadcast_add %1531, %cst_759 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1533 = "mhlo.reshape"(%1532) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1534 = "mhlo.transpose"(%1533) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1535 = "mhlo.dot_general"(%1534, %1530) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1536 = chlo.broadcast_multiply %1535, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1537 = chlo.broadcast_add %1536, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1538 = "mhlo.reduce"(%1537, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1539 = linalg.tensor_expand_shape %1538 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1540 = chlo.broadcast_subtract %1537, %1539 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1541 = "mhlo.exponential"(%1540) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1542 = "mhlo.reduce"(%1541, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1543 = linalg.tensor_expand_shape %1542 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1544 = chlo.broadcast_divide %1541, %1543 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1545 = "mhlo.dot_general"(%1544, %1519) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1546 = "mhlo.transpose"(%1545) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1547 = "mhlo.reshape"(%1546) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1548 = "mhlo.dot"(%1547, %cst_762) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1549 = chlo.broadcast_add %1548, %cst_763 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1550 = "mhlo.reshape"(%1549) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1551 = "mhlo.dot"(%1515, %cst_749) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1552 = chlo.broadcast_add %1551, %cst_750 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1553 = "mhlo.reshape"(%1552) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1554 = chlo.broadcast_multiply %1553, %cst_751 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1555 = chlo.broadcast_add %1554, %cst_763 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1556 = chlo.broadcast_add %1550, %1555 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1557 = chlo.broadcast_multiply %1556, %cst_764 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1558 = chlo.broadcast_add %1557, %cst_765 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1559 = "mhlo.reshape"(%1558) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1560 = "mhlo.dot"(%1559, %cst_747) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1561 = chlo.broadcast_add %1560, %cst_748 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1562 = "mhlo.reshape"(%1561) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1563 = chlo.broadcast_maximum %1562, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1564 = "mhlo.reshape"(%1563) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1565 = "mhlo.dot"(%1564, %cst_743) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1566 = chlo.broadcast_add %1565, %cst_744 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1567 = "mhlo.reshape"(%1566) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1568 = chlo.broadcast_add %1567, %1558 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1569 = chlo.broadcast_multiply %1568, %cst_745 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1570 = chlo.broadcast_add %1569, %cst_746 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1571 = "mhlo.reshape"(%1570) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1572 = "mhlo.dot"(%1571, %cst_741) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1573 = chlo.broadcast_add %1572, %cst_742 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1574 = "mhlo.reshape"(%1573) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1575 = chlo.broadcast_maximum %1574, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1576 = "mhlo.reshape"(%1575) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1577 = "mhlo.dot"(%1576, %cst_737) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1578 = chlo.broadcast_add %1577, %cst_738 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1579 = "mhlo.reshape"(%1578) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1580 = chlo.broadcast_add %1579, %1570 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1581 = chlo.broadcast_multiply %1580, %cst_739 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1582 = chlo.broadcast_add %1581, %cst_740 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1583 = "mhlo.reshape"(%1582) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1584 = "mhlo.dot"(%1583, %cst_735) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1585 = chlo.broadcast_add %1584, %cst_736 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1586 = "mhlo.reshape"(%1585) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1587 = chlo.broadcast_maximum %1586, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1588 = "mhlo.reshape"(%1587) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1589 = "mhlo.dot"(%1588, %cst_731) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1590 = chlo.broadcast_add %1589, %cst_732 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1591 = "mhlo.reshape"(%1590) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1592 = chlo.broadcast_add %1591, %1582 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1593 = chlo.broadcast_multiply %1592, %cst_733 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1594 = chlo.broadcast_add %1593, %cst_734 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1595 = "mhlo.reshape"(%1594) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1596 = "mhlo.dot"(%1595, %cst_729) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1597 = chlo.broadcast_add %1596, %cst_730 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1598 = "mhlo.reshape"(%1597) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1599 = chlo.broadcast_maximum %1598, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1600 = "mhlo.reshape"(%1599) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1601 = "mhlo.dot"(%1600, %cst_721) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1602 = chlo.broadcast_add %1601, %cst_722 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1603 = "mhlo.reshape"(%1602) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1604 = chlo.broadcast_add %1603, %1594 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1605 = chlo.broadcast_multiply %1604, %cst_727 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1606 = chlo.broadcast_add %1605, %cst_728 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1607 = "mhlo.reshape"(%1606) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1608 = "mhlo.dot"(%1607, %cst_723) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1609 = chlo.broadcast_add %1608, %cst_724 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1610 = "mhlo.reshape"(%1609) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1611 = chlo.broadcast_add %1610, %1514 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1612 = chlo.broadcast_multiply %1611, %cst_725 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1613 = chlo.broadcast_add %1612, %cst_726 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1614 = "mhlo.reshape"(%1613) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1615 = "mhlo.dot"(%1614, %cst_711) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1616 = chlo.broadcast_add %1615, %cst_712 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1617 = "mhlo.reshape"(%1616) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1618 = "mhlo.transpose"(%1617) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1619 = "mhlo.dot"(%1614, %cst_707) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1620 = "mhlo.reshape"(%1619) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1621 = "mhlo.broadcast_in_dim"(%cst_708) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1622 = mhlo.add %1620, %1621 : tensor<1x384x128xf32>
%1623 = chlo.broadcast_multiply %1622, %cst_709 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1624 = chlo.broadcast_add %1623, %cst_710 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1625 = "mhlo.reshape"(%1624) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1626 = "mhlo.dot"(%1625, %cst_715) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1627 = chlo.broadcast_add %1626, %cst_716 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1628 = "mhlo.reshape"(%1627) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1629 = "mhlo.transpose"(%1628) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1630 = "mhlo.dot"(%1625, %cst_713) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1631 = chlo.broadcast_add %1630, %cst_714 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1632 = "mhlo.reshape"(%1631) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1633 = "mhlo.transpose"(%1632) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1634 = "mhlo.dot_general"(%1633, %1629) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1635 = chlo.broadcast_multiply %1634, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1636 = chlo.broadcast_add %1635, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1637 = "mhlo.reduce"(%1636, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1638 = linalg.tensor_expand_shape %1637 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1639 = chlo.broadcast_subtract %1636, %1638 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1640 = "mhlo.exponential"(%1639) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1641 = "mhlo.reduce"(%1640, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1642 = linalg.tensor_expand_shape %1641 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1643 = chlo.broadcast_divide %1640, %1642 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1644 = "mhlo.dot_general"(%1643, %1618) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1645 = "mhlo.transpose"(%1644) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1646 = "mhlo.reshape"(%1645) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1647 = "mhlo.dot"(%1646, %cst_717) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1648 = chlo.broadcast_add %1647, %cst_718 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1649 = "mhlo.reshape"(%1648) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1650 = "mhlo.dot"(%1614, %cst_704) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1651 = chlo.broadcast_add %1650, %cst_705 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1652 = "mhlo.reshape"(%1651) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1653 = chlo.broadcast_multiply %1652, %cst_706 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1654 = chlo.broadcast_add %1653, %cst_718 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1655 = chlo.broadcast_add %1649, %1654 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1656 = chlo.broadcast_multiply %1655, %cst_719 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1657 = chlo.broadcast_add %1656, %cst_720 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1658 = "mhlo.reshape"(%1657) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1659 = "mhlo.dot"(%1658, %cst_702) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1660 = chlo.broadcast_add %1659, %cst_703 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1661 = "mhlo.reshape"(%1660) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1662 = chlo.broadcast_maximum %1661, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1663 = "mhlo.reshape"(%1662) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1664 = "mhlo.dot"(%1663, %cst_698) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1665 = chlo.broadcast_add %1664, %cst_699 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1666 = "mhlo.reshape"(%1665) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1667 = chlo.broadcast_add %1666, %1657 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1668 = chlo.broadcast_multiply %1667, %cst_700 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1669 = chlo.broadcast_add %1668, %cst_701 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1670 = "mhlo.reshape"(%1669) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1671 = "mhlo.dot"(%1670, %cst_696) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1672 = chlo.broadcast_add %1671, %cst_697 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1673 = "mhlo.reshape"(%1672) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1674 = chlo.broadcast_maximum %1673, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1675 = "mhlo.reshape"(%1674) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1676 = "mhlo.dot"(%1675, %cst_692) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1677 = chlo.broadcast_add %1676, %cst_693 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1678 = "mhlo.reshape"(%1677) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1679 = chlo.broadcast_add %1678, %1669 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1680 = chlo.broadcast_multiply %1679, %cst_694 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1681 = chlo.broadcast_add %1680, %cst_695 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1682 = "mhlo.reshape"(%1681) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1683 = "mhlo.dot"(%1682, %cst_690) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1684 = chlo.broadcast_add %1683, %cst_691 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1685 = "mhlo.reshape"(%1684) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1686 = chlo.broadcast_maximum %1685, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1687 = "mhlo.reshape"(%1686) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1688 = "mhlo.dot"(%1687, %cst_686) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1689 = chlo.broadcast_add %1688, %cst_687 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1690 = "mhlo.reshape"(%1689) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1691 = chlo.broadcast_add %1690, %1681 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1692 = chlo.broadcast_multiply %1691, %cst_688 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1693 = chlo.broadcast_add %1692, %cst_689 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1694 = "mhlo.reshape"(%1693) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1695 = "mhlo.dot"(%1694, %cst_684) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1696 = chlo.broadcast_add %1695, %cst_685 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1697 = "mhlo.reshape"(%1696) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1698 = chlo.broadcast_maximum %1697, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1699 = "mhlo.reshape"(%1698) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1700 = "mhlo.dot"(%1699, %cst_676) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1701 = chlo.broadcast_add %1700, %cst_677 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1702 = "mhlo.reshape"(%1701) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1703 = chlo.broadcast_add %1702, %1693 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1704 = chlo.broadcast_multiply %1703, %cst_682 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1705 = chlo.broadcast_add %1704, %cst_683 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1706 = "mhlo.reshape"(%1705) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1707 = "mhlo.dot"(%1706, %cst_678) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1708 = chlo.broadcast_add %1707, %cst_679 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1709 = "mhlo.reshape"(%1708) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1710 = chlo.broadcast_add %1709, %1613 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1711 = chlo.broadcast_multiply %1710, %cst_680 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1712 = chlo.broadcast_add %1711, %cst_681 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1713 = "mhlo.reshape"(%1712) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1714 = "mhlo.dot"(%1713, %cst_666) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1715 = chlo.broadcast_add %1714, %cst_667 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1716 = "mhlo.reshape"(%1715) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1717 = "mhlo.transpose"(%1716) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1718 = "mhlo.dot"(%1713, %cst_662) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1719 = "mhlo.reshape"(%1718) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1720 = "mhlo.broadcast_in_dim"(%cst_663) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1721 = mhlo.add %1719, %1720 : tensor<1x384x128xf32>
%1722 = chlo.broadcast_multiply %1721, %cst_664 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1723 = chlo.broadcast_add %1722, %cst_665 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1724 = "mhlo.reshape"(%1723) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1725 = "mhlo.dot"(%1724, %cst_670) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1726 = chlo.broadcast_add %1725, %cst_671 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1727 = "mhlo.reshape"(%1726) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1728 = "mhlo.transpose"(%1727) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1729 = "mhlo.dot"(%1724, %cst_668) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1730 = chlo.broadcast_add %1729, %cst_669 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1731 = "mhlo.reshape"(%1730) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1732 = "mhlo.transpose"(%1731) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1733 = "mhlo.dot_general"(%1732, %1728) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1734 = chlo.broadcast_multiply %1733, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1735 = chlo.broadcast_add %1734, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1736 = "mhlo.reduce"(%1735, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1737 = linalg.tensor_expand_shape %1736 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1738 = chlo.broadcast_subtract %1735, %1737 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1739 = "mhlo.exponential"(%1738) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1740 = "mhlo.reduce"(%1739, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1741 = linalg.tensor_expand_shape %1740 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1742 = chlo.broadcast_divide %1739, %1741 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1743 = "mhlo.dot_general"(%1742, %1717) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1744 = "mhlo.transpose"(%1743) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1745 = "mhlo.reshape"(%1744) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1746 = "mhlo.dot"(%1745, %cst_672) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1747 = chlo.broadcast_add %1746, %cst_673 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1748 = "mhlo.reshape"(%1747) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1749 = "mhlo.dot"(%1713, %cst_659) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1750 = chlo.broadcast_add %1749, %cst_660 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1751 = "mhlo.reshape"(%1750) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1752 = chlo.broadcast_multiply %1751, %cst_661 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1753 = chlo.broadcast_add %1752, %cst_673 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1754 = chlo.broadcast_add %1748, %1753 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1755 = chlo.broadcast_multiply %1754, %cst_674 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1756 = chlo.broadcast_add %1755, %cst_675 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1757 = "mhlo.reshape"(%1756) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1758 = "mhlo.dot"(%1757, %cst_657) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1759 = chlo.broadcast_add %1758, %cst_658 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1760 = "mhlo.reshape"(%1759) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1761 = chlo.broadcast_maximum %1760, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1762 = "mhlo.reshape"(%1761) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1763 = "mhlo.dot"(%1762, %cst_653) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1764 = chlo.broadcast_add %1763, %cst_654 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1765 = "mhlo.reshape"(%1764) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1766 = chlo.broadcast_add %1765, %1756 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1767 = chlo.broadcast_multiply %1766, %cst_655 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1768 = chlo.broadcast_add %1767, %cst_656 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1769 = "mhlo.reshape"(%1768) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1770 = "mhlo.dot"(%1769, %cst_651) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1771 = chlo.broadcast_add %1770, %cst_652 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1772 = "mhlo.reshape"(%1771) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1773 = chlo.broadcast_maximum %1772, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1774 = "mhlo.reshape"(%1773) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1775 = "mhlo.dot"(%1774, %cst_647) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1776 = chlo.broadcast_add %1775, %cst_648 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1777 = "mhlo.reshape"(%1776) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1778 = chlo.broadcast_add %1777, %1768 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1779 = chlo.broadcast_multiply %1778, %cst_649 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1780 = chlo.broadcast_add %1779, %cst_650 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1781 = "mhlo.reshape"(%1780) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1782 = "mhlo.dot"(%1781, %cst_645) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1783 = chlo.broadcast_add %1782, %cst_646 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1784 = "mhlo.reshape"(%1783) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1785 = chlo.broadcast_maximum %1784, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1786 = "mhlo.reshape"(%1785) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1787 = "mhlo.dot"(%1786, %cst_641) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1788 = chlo.broadcast_add %1787, %cst_642 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1789 = "mhlo.reshape"(%1788) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1790 = chlo.broadcast_add %1789, %1780 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1791 = chlo.broadcast_multiply %1790, %cst_643 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1792 = chlo.broadcast_add %1791, %cst_644 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1793 = "mhlo.reshape"(%1792) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1794 = "mhlo.dot"(%1793, %cst_639) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1795 = chlo.broadcast_add %1794, %cst_640 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1796 = "mhlo.reshape"(%1795) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1797 = chlo.broadcast_maximum %1796, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1798 = "mhlo.reshape"(%1797) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1799 = "mhlo.dot"(%1798, %cst_631) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1800 = chlo.broadcast_add %1799, %cst_632 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1801 = "mhlo.reshape"(%1800) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1802 = chlo.broadcast_add %1801, %1792 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1803 = chlo.broadcast_multiply %1802, %cst_637 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1804 = chlo.broadcast_add %1803, %cst_638 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1805 = "mhlo.reshape"(%1804) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1806 = "mhlo.dot"(%1805, %cst_633) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1807 = chlo.broadcast_add %1806, %cst_634 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1808 = "mhlo.reshape"(%1807) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1809 = chlo.broadcast_add %1808, %1712 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1810 = chlo.broadcast_multiply %1809, %cst_635 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1811 = chlo.broadcast_add %1810, %cst_636 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1812 = "mhlo.reshape"(%1811) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1813 = "mhlo.dot"(%1812, %cst_621) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1814 = chlo.broadcast_add %1813, %cst_622 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1815 = "mhlo.reshape"(%1814) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1816 = "mhlo.transpose"(%1815) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1817 = "mhlo.dot"(%1812, %cst_617) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1818 = "mhlo.reshape"(%1817) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1819 = "mhlo.broadcast_in_dim"(%cst_618) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1820 = mhlo.add %1818, %1819 : tensor<1x384x128xf32>
%1821 = chlo.broadcast_multiply %1820, %cst_619 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1822 = chlo.broadcast_add %1821, %cst_620 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1823 = "mhlo.reshape"(%1822) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1824 = "mhlo.dot"(%1823, %cst_625) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1825 = chlo.broadcast_add %1824, %cst_626 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1826 = "mhlo.reshape"(%1825) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1827 = "mhlo.transpose"(%1826) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1828 = "mhlo.dot"(%1823, %cst_623) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1829 = chlo.broadcast_add %1828, %cst_624 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1830 = "mhlo.reshape"(%1829) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1831 = "mhlo.transpose"(%1830) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1832 = "mhlo.dot_general"(%1831, %1827) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1833 = chlo.broadcast_multiply %1832, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1834 = chlo.broadcast_add %1833, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1835 = "mhlo.reduce"(%1834, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1836 = linalg.tensor_expand_shape %1835 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1837 = chlo.broadcast_subtract %1834, %1836 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1838 = "mhlo.exponential"(%1837) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1839 = "mhlo.reduce"(%1838, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1840 = linalg.tensor_expand_shape %1839 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1841 = chlo.broadcast_divide %1838, %1840 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1842 = "mhlo.dot_general"(%1841, %1816) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1843 = "mhlo.transpose"(%1842) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1844 = "mhlo.reshape"(%1843) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1845 = "mhlo.dot"(%1844, %cst_627) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1846 = chlo.broadcast_add %1845, %cst_628 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1847 = "mhlo.reshape"(%1846) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1848 = "mhlo.dot"(%1812, %cst_614) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1849 = chlo.broadcast_add %1848, %cst_615 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1850 = "mhlo.reshape"(%1849) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1851 = chlo.broadcast_multiply %1850, %cst_616 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1852 = chlo.broadcast_add %1851, %cst_628 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1853 = chlo.broadcast_add %1847, %1852 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1854 = chlo.broadcast_multiply %1853, %cst_629 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1855 = chlo.broadcast_add %1854, %cst_630 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1856 = "mhlo.reshape"(%1855) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1857 = "mhlo.dot"(%1856, %cst_612) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1858 = chlo.broadcast_add %1857, %cst_613 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1859 = "mhlo.reshape"(%1858) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1860 = chlo.broadcast_maximum %1859, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1861 = "mhlo.reshape"(%1860) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1862 = "mhlo.dot"(%1861, %cst_608) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1863 = chlo.broadcast_add %1862, %cst_609 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1864 = "mhlo.reshape"(%1863) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1865 = chlo.broadcast_add %1864, %1855 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1866 = chlo.broadcast_multiply %1865, %cst_610 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1867 = chlo.broadcast_add %1866, %cst_611 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1868 = "mhlo.reshape"(%1867) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1869 = "mhlo.dot"(%1868, %cst_606) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1870 = chlo.broadcast_add %1869, %cst_607 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1871 = "mhlo.reshape"(%1870) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1872 = chlo.broadcast_maximum %1871, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1873 = "mhlo.reshape"(%1872) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1874 = "mhlo.dot"(%1873, %cst_602) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1875 = chlo.broadcast_add %1874, %cst_603 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1876 = "mhlo.reshape"(%1875) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1877 = chlo.broadcast_add %1876, %1867 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1878 = chlo.broadcast_multiply %1877, %cst_604 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1879 = chlo.broadcast_add %1878, %cst_605 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1880 = "mhlo.reshape"(%1879) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1881 = "mhlo.dot"(%1880, %cst_600) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1882 = chlo.broadcast_add %1881, %cst_601 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1883 = "mhlo.reshape"(%1882) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1884 = chlo.broadcast_maximum %1883, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1885 = "mhlo.reshape"(%1884) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1886 = "mhlo.dot"(%1885, %cst_596) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1887 = chlo.broadcast_add %1886, %cst_597 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1888 = "mhlo.reshape"(%1887) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1889 = chlo.broadcast_add %1888, %1879 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1890 = chlo.broadcast_multiply %1889, %cst_598 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1891 = chlo.broadcast_add %1890, %cst_599 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1892 = "mhlo.reshape"(%1891) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1893 = "mhlo.dot"(%1892, %cst_594) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1894 = chlo.broadcast_add %1893, %cst_595 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1895 = "mhlo.reshape"(%1894) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1896 = chlo.broadcast_maximum %1895, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1897 = "mhlo.reshape"(%1896) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1898 = "mhlo.dot"(%1897, %cst_586) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1899 = chlo.broadcast_add %1898, %cst_587 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1900 = "mhlo.reshape"(%1899) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1901 = chlo.broadcast_add %1900, %1891 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1902 = chlo.broadcast_multiply %1901, %cst_592 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1903 = chlo.broadcast_add %1902, %cst_593 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1904 = "mhlo.reshape"(%1903) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1905 = "mhlo.dot"(%1904, %cst_588) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1906 = chlo.broadcast_add %1905, %cst_589 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1907 = "mhlo.reshape"(%1906) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1908 = chlo.broadcast_add %1907, %1811 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1909 = chlo.broadcast_multiply %1908, %cst_590 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1910 = chlo.broadcast_add %1909, %cst_591 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1911 = "mhlo.reshape"(%1910) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1912 = "mhlo.dot"(%1911, %cst_576) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1913 = chlo.broadcast_add %1912, %cst_577 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1914 = "mhlo.reshape"(%1913) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1915 = "mhlo.transpose"(%1914) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1916 = "mhlo.dot"(%1911, %cst_572) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1917 = "mhlo.reshape"(%1916) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1918 = "mhlo.broadcast_in_dim"(%cst_573) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1919 = mhlo.add %1917, %1918 : tensor<1x384x128xf32>
%1920 = chlo.broadcast_multiply %1919, %cst_574 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1921 = chlo.broadcast_add %1920, %cst_575 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1922 = "mhlo.reshape"(%1921) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1923 = "mhlo.dot"(%1922, %cst_580) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1924 = chlo.broadcast_add %1923, %cst_581 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1925 = "mhlo.reshape"(%1924) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1926 = "mhlo.transpose"(%1925) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1927 = "mhlo.dot"(%1922, %cst_578) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1928 = chlo.broadcast_add %1927, %cst_579 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1929 = "mhlo.reshape"(%1928) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1930 = "mhlo.transpose"(%1929) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1931 = "mhlo.dot_general"(%1930, %1926) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1932 = chlo.broadcast_multiply %1931, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1933 = chlo.broadcast_add %1932, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1934 = "mhlo.reduce"(%1933, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1935 = linalg.tensor_expand_shape %1934 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1936 = chlo.broadcast_subtract %1933, %1935 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1937 = "mhlo.exponential"(%1936) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1938 = "mhlo.reduce"(%1937, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1939 = linalg.tensor_expand_shape %1938 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1940 = chlo.broadcast_divide %1937, %1939 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1941 = "mhlo.dot_general"(%1940, %1915) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1942 = "mhlo.transpose"(%1941) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1943 = "mhlo.reshape"(%1942) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1944 = "mhlo.dot"(%1943, %cst_582) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1945 = chlo.broadcast_add %1944, %cst_583 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1946 = "mhlo.reshape"(%1945) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1947 = "mhlo.dot"(%1911, %cst_569) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1948 = chlo.broadcast_add %1947, %cst_570 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1949 = "mhlo.reshape"(%1948) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1950 = chlo.broadcast_multiply %1949, %cst_571 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1951 = chlo.broadcast_add %1950, %cst_583 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1952 = chlo.broadcast_add %1946, %1951 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1953 = chlo.broadcast_multiply %1952, %cst_584 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1954 = chlo.broadcast_add %1953, %cst_585 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1955 = "mhlo.reshape"(%1954) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1956 = "mhlo.dot"(%1955, %cst_567) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1957 = chlo.broadcast_add %1956, %cst_568 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1958 = "mhlo.reshape"(%1957) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1959 = chlo.broadcast_maximum %1958, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1960 = "mhlo.reshape"(%1959) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1961 = "mhlo.dot"(%1960, %cst_563) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1962 = chlo.broadcast_add %1961, %cst_564 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1963 = "mhlo.reshape"(%1962) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1964 = chlo.broadcast_add %1963, %1954 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1965 = chlo.broadcast_multiply %1964, %cst_565 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1966 = chlo.broadcast_add %1965, %cst_566 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1967 = "mhlo.reshape"(%1966) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1968 = "mhlo.dot"(%1967, %cst_561) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1969 = chlo.broadcast_add %1968, %cst_562 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1970 = "mhlo.reshape"(%1969) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1971 = chlo.broadcast_maximum %1970, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1972 = "mhlo.reshape"(%1971) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1973 = "mhlo.dot"(%1972, %cst_557) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1974 = chlo.broadcast_add %1973, %cst_558 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1975 = "mhlo.reshape"(%1974) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1976 = chlo.broadcast_add %1975, %1966 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1977 = chlo.broadcast_multiply %1976, %cst_559 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1978 = chlo.broadcast_add %1977, %cst_560 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1979 = "mhlo.reshape"(%1978) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1980 = "mhlo.dot"(%1979, %cst_555) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1981 = chlo.broadcast_add %1980, %cst_556 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1982 = "mhlo.reshape"(%1981) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1983 = chlo.broadcast_maximum %1982, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1984 = "mhlo.reshape"(%1983) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1985 = "mhlo.dot"(%1984, %cst_551) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1986 = chlo.broadcast_add %1985, %cst_552 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1987 = "mhlo.reshape"(%1986) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1988 = chlo.broadcast_add %1987, %1978 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1989 = chlo.broadcast_multiply %1988, %cst_553 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1990 = chlo.broadcast_add %1989, %cst_554 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1991 = "mhlo.reshape"(%1990) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1992 = "mhlo.dot"(%1991, %cst_549) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1993 = chlo.broadcast_add %1992, %cst_550 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1994 = "mhlo.reshape"(%1993) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1995 = chlo.broadcast_maximum %1994, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1996 = "mhlo.reshape"(%1995) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1997 = "mhlo.dot"(%1996, %cst_541) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1998 = chlo.broadcast_add %1997, %cst_542 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1999 = "mhlo.reshape"(%1998) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2000 = chlo.broadcast_add %1999, %1990 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2001 = chlo.broadcast_multiply %2000, %cst_547 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2002 = chlo.broadcast_add %2001, %cst_548 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2003 = "mhlo.reshape"(%2002) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2004 = "mhlo.dot"(%2003, %cst_543) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2005 = chlo.broadcast_add %2004, %cst_544 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2006 = "mhlo.reshape"(%2005) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2007 = chlo.broadcast_add %2006, %1910 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2008 = chlo.broadcast_multiply %2007, %cst_545 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2009 = chlo.broadcast_add %2008, %cst_546 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2010 = "mhlo.reshape"(%2009) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2011 = "mhlo.dot"(%2010, %cst_486) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2012 = chlo.broadcast_add %2011, %cst_487 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2013 = "mhlo.reshape"(%2012) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2014 = "mhlo.transpose"(%2013) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2015 = "mhlo.dot"(%2010, %cst_482) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2016 = "mhlo.reshape"(%2015) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2017 = "mhlo.broadcast_in_dim"(%cst_483) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%2018 = mhlo.add %2016, %2017 : tensor<1x384x128xf32>
%2019 = chlo.broadcast_multiply %2018, %cst_484 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2020 = chlo.broadcast_add %2019, %cst_485 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2021 = "mhlo.reshape"(%2020) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2022 = "mhlo.dot"(%2021, %cst_490) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2023 = chlo.broadcast_add %2022, %cst_491 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2024 = "mhlo.reshape"(%2023) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2025 = "mhlo.transpose"(%2024) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2026 = "mhlo.dot"(%2021, %cst_488) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2027 = chlo.broadcast_add %2026, %cst_489 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2028 = "mhlo.reshape"(%2027) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2029 = "mhlo.transpose"(%2028) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2030 = "mhlo.dot_general"(%2029, %2025) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%2031 = chlo.broadcast_multiply %2030, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%2032 = chlo.broadcast_add %2031, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%2033 = "mhlo.reduce"(%2032, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2034 = linalg.tensor_expand_shape %2033 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2035 = chlo.broadcast_subtract %2032, %2034 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2036 = "mhlo.exponential"(%2035) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%2037 = "mhlo.reduce"(%2036, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2038 = linalg.tensor_expand_shape %2037 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2039 = chlo.broadcast_divide %2036, %2038 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2040 = "mhlo.dot_general"(%2039, %2014) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%2041 = "mhlo.transpose"(%2040) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%2042 = "mhlo.reshape"(%2041) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%2043 = "mhlo.dot"(%2042, %cst_492) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2044 = chlo.broadcast_add %2043, %cst_493 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2045 = "mhlo.reshape"(%2044) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2046 = "mhlo.dot"(%2010, %cst_479) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2047 = chlo.broadcast_add %2046, %cst_480 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2048 = "mhlo.reshape"(%2047) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2049 = chlo.broadcast_multiply %2048, %cst_481 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2050 = chlo.broadcast_add %2049, %cst_493 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2051 = chlo.broadcast_add %2045, %2050 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2052 = chlo.broadcast_multiply %2051, %cst_494 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2053 = chlo.broadcast_add %2052, %cst_495 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2054 = "mhlo.reshape"(%2053) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2055 = "mhlo.dot"(%2054, %cst_477) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2056 = chlo.broadcast_add %2055, %cst_478 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2057 = "mhlo.reshape"(%2056) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2058 = chlo.broadcast_maximum %2057, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2059 = "mhlo.reshape"(%2058) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2060 = "mhlo.dot"(%2059, %cst_473) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2061 = chlo.broadcast_add %2060, %cst_474 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2062 = "mhlo.reshape"(%2061) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2063 = chlo.broadcast_add %2062, %2053 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2064 = chlo.broadcast_multiply %2063, %cst_475 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2065 = chlo.broadcast_add %2064, %cst_476 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2066 = "mhlo.reshape"(%2065) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2067 = "mhlo.dot"(%2066, %cst_471) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2068 = chlo.broadcast_add %2067, %cst_472 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2069 = "mhlo.reshape"(%2068) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2070 = chlo.broadcast_maximum %2069, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2071 = "mhlo.reshape"(%2070) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2072 = "mhlo.dot"(%2071, %cst_467) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2073 = chlo.broadcast_add %2072, %cst_468 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2074 = "mhlo.reshape"(%2073) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2075 = chlo.broadcast_add %2074, %2065 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2076 = chlo.broadcast_multiply %2075, %cst_469 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2077 = chlo.broadcast_add %2076, %cst_470 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2078 = "mhlo.reshape"(%2077) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2079 = "mhlo.dot"(%2078, %cst_465) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2080 = chlo.broadcast_add %2079, %cst_466 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2081 = "mhlo.reshape"(%2080) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2082 = chlo.broadcast_maximum %2081, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2083 = "mhlo.reshape"(%2082) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2084 = "mhlo.dot"(%2083, %cst_461) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2085 = chlo.broadcast_add %2084, %cst_462 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2086 = "mhlo.reshape"(%2085) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2087 = chlo.broadcast_add %2086, %2077 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2088 = chlo.broadcast_multiply %2087, %cst_463 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2089 = chlo.broadcast_add %2088, %cst_464 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2090 = "mhlo.reshape"(%2089) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2091 = "mhlo.dot"(%2090, %cst_459) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2092 = chlo.broadcast_add %2091, %cst_460 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2093 = "mhlo.reshape"(%2092) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2094 = chlo.broadcast_maximum %2093, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2095 = "mhlo.reshape"(%2094) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2096 = "mhlo.dot"(%2095, %cst_451) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2097 = chlo.broadcast_add %2096, %cst_452 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2098 = "mhlo.reshape"(%2097) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2099 = chlo.broadcast_add %2098, %2089 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2100 = chlo.broadcast_multiply %2099, %cst_457 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2101 = chlo.broadcast_add %2100, %cst_458 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2102 = "mhlo.reshape"(%2101) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2103 = "mhlo.dot"(%2102, %cst_453) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2104 = chlo.broadcast_add %2103, %cst_454 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2105 = "mhlo.reshape"(%2104) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2106 = chlo.broadcast_add %2105, %2009 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2107 = chlo.broadcast_multiply %2106, %cst_455 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2108 = chlo.broadcast_add %2107, %cst_456 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2109 = "mhlo.reshape"(%2108) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2110 = "mhlo.dot"(%2109, %cst_441) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2111 = chlo.broadcast_add %2110, %cst_442 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2112 = "mhlo.reshape"(%2111) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2113 = "mhlo.transpose"(%2112) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2114 = "mhlo.dot"(%2109, %cst_437) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2115 = "mhlo.reshape"(%2114) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2116 = "mhlo.broadcast_in_dim"(%cst_438) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%2117 = mhlo.add %2115, %2116 : tensor<1x384x128xf32>
%2118 = chlo.broadcast_multiply %2117, %cst_439 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2119 = chlo.broadcast_add %2118, %cst_440 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2120 = "mhlo.reshape"(%2119) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2121 = "mhlo.dot"(%2120, %cst_445) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2122 = chlo.broadcast_add %2121, %cst_446 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2123 = "mhlo.reshape"(%2122) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2124 = "mhlo.transpose"(%2123) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2125 = "mhlo.dot"(%2120, %cst_443) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2126 = chlo.broadcast_add %2125, %cst_444 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2127 = "mhlo.reshape"(%2126) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2128 = "mhlo.transpose"(%2127) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2129 = "mhlo.dot_general"(%2128, %2124) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%2130 = chlo.broadcast_multiply %2129, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%2131 = chlo.broadcast_add %2130, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%2132 = "mhlo.reduce"(%2131, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2133 = linalg.tensor_expand_shape %2132 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2134 = chlo.broadcast_subtract %2131, %2133 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2135 = "mhlo.exponential"(%2134) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%2136 = "mhlo.reduce"(%2135, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2137 = linalg.tensor_expand_shape %2136 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2138 = chlo.broadcast_divide %2135, %2137 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2139 = "mhlo.dot_general"(%2138, %2113) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%2140 = "mhlo.transpose"(%2139) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%2141 = "mhlo.reshape"(%2140) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%2142 = "mhlo.dot"(%2141, %cst_447) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2143 = chlo.broadcast_add %2142, %cst_448 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2144 = "mhlo.reshape"(%2143) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2145 = "mhlo.dot"(%2109, %cst_434) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2146 = chlo.broadcast_add %2145, %cst_435 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2147 = "mhlo.reshape"(%2146) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2148 = chlo.broadcast_multiply %2147, %cst_436 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2149 = chlo.broadcast_add %2148, %cst_448 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2150 = chlo.broadcast_add %2144, %2149 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2151 = chlo.broadcast_multiply %2150, %cst_449 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2152 = chlo.broadcast_add %2151, %cst_450 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2153 = "mhlo.reshape"(%2152) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2154 = "mhlo.dot"(%2153, %cst_432) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2155 = chlo.broadcast_add %2154, %cst_433 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2156 = "mhlo.reshape"(%2155) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2157 = chlo.broadcast_maximum %2156, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2158 = "mhlo.reshape"(%2157) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2159 = "mhlo.dot"(%2158, %cst_428) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2160 = chlo.broadcast_add %2159, %cst_429 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2161 = "mhlo.reshape"(%2160) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2162 = chlo.broadcast_add %2161, %2152 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2163 = chlo.broadcast_multiply %2162, %cst_430 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2164 = chlo.broadcast_add %2163, %cst_431 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2165 = "mhlo.reshape"(%2164) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2166 = "mhlo.dot"(%2165, %cst_426) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2167 = chlo.broadcast_add %2166, %cst_427 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2168 = "mhlo.reshape"(%2167) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2169 = chlo.broadcast_maximum %2168, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2170 = "mhlo.reshape"(%2169) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2171 = "mhlo.dot"(%2170, %cst_422) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2172 = chlo.broadcast_add %2171, %cst_423 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2173 = "mhlo.reshape"(%2172) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2174 = chlo.broadcast_add %2173, %2164 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2175 = chlo.broadcast_multiply %2174, %cst_424 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2176 = chlo.broadcast_add %2175, %cst_425 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2177 = "mhlo.reshape"(%2176) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2178 = "mhlo.dot"(%2177, %cst_420) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2179 = chlo.broadcast_add %2178, %cst_421 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2180 = "mhlo.reshape"(%2179) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2181 = chlo.broadcast_maximum %2180, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2182 = "mhlo.reshape"(%2181) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2183 = "mhlo.dot"(%2182, %cst_416) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2184 = chlo.broadcast_add %2183, %cst_417 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2185 = "mhlo.reshape"(%2184) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2186 = chlo.broadcast_add %2185, %2176 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2187 = chlo.broadcast_multiply %2186, %cst_418 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2188 = chlo.broadcast_add %2187, %cst_419 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2189 = "mhlo.reshape"(%2188) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2190 = "mhlo.dot"(%2189, %cst_414) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2191 = chlo.broadcast_add %2190, %cst_415 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2192 = "mhlo.reshape"(%2191) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2193 = chlo.broadcast_maximum %2192, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2194 = "mhlo.reshape"(%2193) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2195 = "mhlo.dot"(%2194, %cst_406) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2196 = chlo.broadcast_add %2195, %cst_407 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2197 = "mhlo.reshape"(%2196) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2198 = chlo.broadcast_add %2197, %2188 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2199 = chlo.broadcast_multiply %2198, %cst_412 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2200 = chlo.broadcast_add %2199, %cst_413 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2201 = "mhlo.reshape"(%2200) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2202 = "mhlo.dot"(%2201, %cst_408) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2203 = chlo.broadcast_add %2202, %cst_409 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2204 = "mhlo.reshape"(%2203) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2205 = chlo.broadcast_add %2204, %2108 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2206 = chlo.broadcast_multiply %2205, %cst_410 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2207 = chlo.broadcast_add %2206, %cst_411 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2208 = "mhlo.reshape"(%2207) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2209 = "mhlo.dot"(%2208, %cst_396) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2210 = chlo.broadcast_add %2209, %cst_397 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2211 = "mhlo.reshape"(%2210) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2212 = "mhlo.transpose"(%2211) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2213 = "mhlo.dot"(%2208, %cst_392) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2214 = "mhlo.reshape"(%2213) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2215 = "mhlo.broadcast_in_dim"(%cst_393) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%2216 = mhlo.add %2214, %2215 : tensor<1x384x128xf32>
%2217 = chlo.broadcast_multiply %2216, %cst_394 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2218 = chlo.broadcast_add %2217, %cst_395 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2219 = "mhlo.reshape"(%2218) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2220 = "mhlo.dot"(%2219, %cst_400) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2221 = chlo.broadcast_add %2220, %cst_401 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2222 = "mhlo.reshape"(%2221) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2223 = "mhlo.transpose"(%2222) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2224 = "mhlo.dot"(%2219, %cst_398) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2225 = chlo.broadcast_add %2224, %cst_399 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2226 = "mhlo.reshape"(%2225) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2227 = "mhlo.transpose"(%2226) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2228 = "mhlo.dot_general"(%2227, %2223) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%2229 = chlo.broadcast_multiply %2228, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%2230 = chlo.broadcast_add %2229, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%2231 = "mhlo.reduce"(%2230, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2232 = linalg.tensor_expand_shape %2231 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2233 = chlo.broadcast_subtract %2230, %2232 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2234 = "mhlo.exponential"(%2233) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%2235 = "mhlo.reduce"(%2234, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2236 = linalg.tensor_expand_shape %2235 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2237 = chlo.broadcast_divide %2234, %2236 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2238 = "mhlo.dot_general"(%2237, %2212) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%2239 = "mhlo.transpose"(%2238) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%2240 = "mhlo.reshape"(%2239) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%2241 = "mhlo.dot"(%2240, %cst_402) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2242 = chlo.broadcast_add %2241, %cst_403 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2243 = "mhlo.reshape"(%2242) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2244 = "mhlo.dot"(%2208, %cst_389) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2245 = chlo.broadcast_add %2244, %cst_390 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2246 = "mhlo.reshape"(%2245) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2247 = chlo.broadcast_multiply %2246, %cst_391 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2248 = chlo.broadcast_add %2247, %cst_403 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2249 = chlo.broadcast_add %2243, %2248 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2250 = chlo.broadcast_multiply %2249, %cst_404 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2251 = chlo.broadcast_add %2250, %cst_405 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2252 = "mhlo.reshape"(%2251) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2253 = "mhlo.dot"(%2252, %cst_387) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2254 = chlo.broadcast_add %2253, %cst_388 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2255 = "mhlo.reshape"(%2254) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2256 = chlo.broadcast_maximum %2255, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2257 = "mhlo.reshape"(%2256) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2258 = "mhlo.dot"(%2257, %cst_383) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2259 = chlo.broadcast_add %2258, %cst_384 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2260 = "mhlo.reshape"(%2259) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2261 = chlo.broadcast_add %2260, %2251 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2262 = chlo.broadcast_multiply %2261, %cst_385 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2263 = chlo.broadcast_add %2262, %cst_386 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2264 = "mhlo.reshape"(%2263) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2265 = "mhlo.dot"(%2264, %cst_381) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2266 = chlo.broadcast_add %2265, %cst_382 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2267 = "mhlo.reshape"(%2266) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2268 = chlo.broadcast_maximum %2267, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2269 = "mhlo.reshape"(%2268) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2270 = "mhlo.dot"(%2269, %cst_377) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2271 = chlo.broadcast_add %2270, %cst_378 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2272 = "mhlo.reshape"(%2271) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2273 = chlo.broadcast_add %2272, %2263 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2274 = chlo.broadcast_multiply %2273, %cst_379 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2275 = chlo.broadcast_add %2274, %cst_380 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2276 = "mhlo.reshape"(%2275) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2277 = "mhlo.dot"(%2276, %cst_375) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2278 = chlo.broadcast_add %2277, %cst_376 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2279 = "mhlo.reshape"(%2278) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2280 = chlo.broadcast_maximum %2279, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2281 = "mhlo.reshape"(%2280) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2282 = "mhlo.dot"(%2281, %cst_371) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2283 = chlo.broadcast_add %2282, %cst_372 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2284 = "mhlo.reshape"(%2283) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2285 = chlo.broadcast_add %2284, %2275 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2286 = chlo.broadcast_multiply %2285, %cst_373 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2287 = chlo.broadcast_add %2286, %cst_374 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2288 = "mhlo.reshape"(%2287) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2289 = "mhlo.dot"(%2288, %cst_369) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2290 = chlo.broadcast_add %2289, %cst_370 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2291 = "mhlo.reshape"(%2290) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2292 = chlo.broadcast_maximum %2291, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2293 = "mhlo.reshape"(%2292) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2294 = "mhlo.dot"(%2293, %cst_361) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2295 = chlo.broadcast_add %2294, %cst_362 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2296 = "mhlo.reshape"(%2295) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2297 = chlo.broadcast_add %2296, %2287 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2298 = chlo.broadcast_multiply %2297, %cst_367 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2299 = chlo.broadcast_add %2298, %cst_368 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2300 = "mhlo.reshape"(%2299) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2301 = "mhlo.dot"(%2300, %cst_363) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2302 = chlo.broadcast_add %2301, %cst_364 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2303 = "mhlo.reshape"(%2302) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2304 = chlo.broadcast_add %2303, %2207 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2305 = chlo.broadcast_multiply %2304, %cst_365 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2306 = chlo.broadcast_add %2305, %cst_366 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2307 = "mhlo.reshape"(%2306) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2308 = "mhlo.dot"(%2307, %cst_351) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2309 = chlo.broadcast_add %2308, %cst_352 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2310 = "mhlo.reshape"(%2309) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2311 = "mhlo.transpose"(%2310) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2312 = "mhlo.dot"(%2307, %cst_347) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2313 = "mhlo.reshape"(%2312) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2314 = "mhlo.broadcast_in_dim"(%cst_348) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%2315 = mhlo.add %2313, %2314 : tensor<1x384x128xf32>
%2316 = chlo.broadcast_multiply %2315, %cst_349 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2317 = chlo.broadcast_add %2316, %cst_350 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2318 = "mhlo.reshape"(%2317) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2319 = "mhlo.dot"(%2318, %cst_355) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2320 = chlo.broadcast_add %2319, %cst_356 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2321 = "mhlo.reshape"(%2320) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2322 = "mhlo.transpose"(%2321) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2323 = "mhlo.dot"(%2318, %cst_353) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2324 = chlo.broadcast_add %2323, %cst_354 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2325 = "mhlo.reshape"(%2324) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2326 = "mhlo.transpose"(%2325) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2327 = "mhlo.dot_general"(%2326, %2322) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%2328 = chlo.broadcast_multiply %2327, %4 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%2329 = chlo.broadcast_add %2328, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%2330 = "mhlo.reduce"(%2329, %2) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2331 = linalg.tensor_expand_shape %2330 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2332 = chlo.broadcast_subtract %2329, %2331 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2333 = "mhlo.exponential"(%2332) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%2334 = "mhlo.reduce"(%2333, %1) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%2335 = linalg.tensor_expand_shape %2334 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%2336 = chlo.broadcast_divide %2333, %2335 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%2337 = "mhlo.dot_general"(%2336, %2311) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%2338 = "mhlo.transpose"(%2337) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%2339 = "mhlo.reshape"(%2338) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%2340 = "mhlo.dot"(%2339, %cst_357) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2341 = chlo.broadcast_add %2340, %cst_358 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2342 = "mhlo.reshape"(%2341) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2343 = "mhlo.dot"(%2307, %cst_344) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2344 = chlo.broadcast_add %2343, %cst_345 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2345 = "mhlo.reshape"(%2344) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2346 = chlo.broadcast_multiply %2345, %cst_346 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2347 = chlo.broadcast_add %2346, %cst_358 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2348 = chlo.broadcast_add %2342, %2347 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2349 = chlo.broadcast_multiply %2348, %cst_359 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2350 = chlo.broadcast_add %2349, %cst_360 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2351 = "mhlo.reshape"(%2350) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2352 = "mhlo.dot"(%2351, %cst_342) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2353 = chlo.broadcast_add %2352, %cst_343 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2354 = "mhlo.reshape"(%2353) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2355 = chlo.broadcast_maximum %2354, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2356 = "mhlo.reshape"(%2355) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2357 = "mhlo.dot"(%2356, %cst_338) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2358 = chlo.broadcast_add %2357, %cst_339 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2359 = "mhlo.reshape"(%2358) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2360 = chlo.broadcast_add %2359, %2350 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2361 = chlo.broadcast_multiply %2360, %cst_340 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2362 = chlo.broadcast_add %2361, %cst_341 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2363 = "mhlo.reshape"(%2362) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2364 = "mhlo.dot"(%2363, %cst_336) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2365 = chlo.broadcast_add %2364, %cst_337 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2366 = "mhlo.reshape"(%2365) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2367 = chlo.broadcast_maximum %2366, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2368 = "mhlo.reshape"(%2367) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2369 = "mhlo.dot"(%2368, %cst_332) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2370 = chlo.broadcast_add %2369, %cst_333 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2371 = "mhlo.reshape"(%2370) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2372 = chlo.broadcast_add %2371, %2362 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2373 = chlo.broadcast_multiply %2372, %cst_334 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2374 = chlo.broadcast_add %2373, %cst_335 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2375 = "mhlo.reshape"(%2374) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2376 = "mhlo.dot"(%2375, %cst_330) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2377 = chlo.broadcast_add %2376, %cst_331 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2378 = "mhlo.reshape"(%2377) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2379 = chlo.broadcast_maximum %2378, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2380 = "mhlo.reshape"(%2379) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2381 = "mhlo.dot"(%2380, %cst_326) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2382 = chlo.broadcast_add %2381, %cst_327 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2383 = "mhlo.reshape"(%2382) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2384 = chlo.broadcast_add %2383, %2374 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2385 = chlo.broadcast_multiply %2384, %cst_328 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2386 = chlo.broadcast_add %2385, %cst_329 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2387 = "mhlo.reshape"(%2386) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2388 = "mhlo.dot"(%2387, %cst_324) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2389 = chlo.broadcast_add %2388, %cst_325 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2390 = "mhlo.reshape"(%2389) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2391 = chlo.broadcast_maximum %2390, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%2392 = "mhlo.reshape"(%2391) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2393 = "mhlo.dot"(%2392, %cst_316) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2394 = chlo.broadcast_add %2393, %cst_317 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2395 = "mhlo.reshape"(%2394) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2396 = chlo.broadcast_add %2395, %2386 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2397 = chlo.broadcast_multiply %2396, %cst_322 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2398 = chlo.broadcast_add %2397, %cst_323 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2399 = "mhlo.reshape"(%2398) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2400 = "mhlo.dot"(%2399, %cst_318) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2401 = chlo.broadcast_add %2400, %cst_319 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2402 = "mhlo.reshape"(%2401) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2403 = chlo.broadcast_add %2402, %2306 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2404 = chlo.broadcast_multiply %2403, %cst_320 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2405 = chlo.broadcast_add %2404, %cst_321 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2406 = "mhlo.reshape"(%2405) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2407 = "mhlo.transpose"(%cst) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<2x512xf32>) -> tensor<512x2xf32>
%2408 = "mhlo.dot"(%2406, %2407) : (tensor<384x512xf32>, tensor<512x2xf32>) -> tensor<384x2xf32>
%2409 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<2xf32>) -> tensor<384x2xf32>
%2410 = mhlo.add %2408, %2409 : tensor<384x2xf32>
%2411 = "mhlo.reshape"(%2410) : (tensor<384x2xf32>) -> tensor<1x384x2xf32>
%2412 = "mhlo.transpose"(%2411) {permutation = dense<[2, 0, 1]> : tensor<3xi64>} : (tensor<1x384x2xf32>) -> tensor<2x1x384xf32>
%2413 = "mhlo.slice"(%2412) {limit_indices = dense<[1, 1, 384]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<2x1x384xf32>) -> tensor<1x1x384xf32>
%2414 = linalg.tensor_collapse_shape %2413 [[0], [1, 2]] : tensor<1x1x384xf32> into tensor<1x384xf32>
%2415 = "mhlo.slice"(%2412) {limit_indices = dense<[2, 1, 384]> : tensor<3xi64>, start_indices = dense<[1, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<2x1x384xf32>) -> tensor<1x1x384xf32>
%2416 = linalg.tensor_collapse_shape %2415 [[0], [1, 2]] : tensor<1x1x384xf32> into tensor<1x384xf32>
return %2416, %2414 : tensor<1x384xf32>, tensor<1x384xf32>
}
}
// -----// IR Dump After Canonicalizer //----- //
builtin.func private @serving_default__ireesm(%arg0: tensor<1x384xi32>, %arg1: tensor<1x384xi32>, %arg2: tensor<1x384xi32>) -> (tensor<1x384xf32>, tensor<1x384xf32>) attributes {tf.entry_function = {control_outputs = "", inputs = "segment_ids:0,input_mask:0,input_ids:0", outputs = "end_logits:0,start_logits:0"}} {
%0 = mhlo.constant dense<1.000000e+00> : tensor<1x384x1xf32>
%1 = mhlo.constant dense<1.000000e+04> : tensor<f32>
%2 = mhlo.constant dense<0.176776692> : tensor<f32>
%3 = mhlo.constant dense<-1.000000e+04> : tensor<f32>
%4 = mhlo.constant dense<0xFF800000> : tensor<f32>
%5 = mhlo.constant dense<0.000000e+00> : tensor<f32>
%cst = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32>
%6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1x384x512xf32>
%cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32>
%cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<30522x128xf32>
%cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_244 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_245 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_246 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_247 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_248 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_249 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_250 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_251 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_252 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_253 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_254 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_255 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_256 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_257 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_258 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_259 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_260 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_261 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_262 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_263 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_264 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_265 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_266 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_267 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_268 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_269 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_270 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_271 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_272 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_273 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_274 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_275 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_276 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_277 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_278 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_279 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_280 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_281 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_282 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_283 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_284 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_285 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_286 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_287 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_288 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_289 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_290 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_291 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_292 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_293 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_294 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_295 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_296 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_297 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_298 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_299 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_300 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_301 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_302 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_303 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_304 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_305 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_306 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_307 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_308 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_309 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_310 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_311 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_312 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_313 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_314 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_315 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_316 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_317 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_318 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_319 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_320 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_321 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_322 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_323 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_324 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_325 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_326 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_327 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_328 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_329 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_330 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_331 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_332 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_333 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_334 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_335 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_336 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_337 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_338 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_339 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_340 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_341 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_342 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_343 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_344 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_345 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_346 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_347 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_348 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_349 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_350 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_351 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_352 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_353 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_354 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_355 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_356 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_357 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_358 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_359 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_360 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_361 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_362 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_363 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_364 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_365 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_366 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_367 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_368 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_369 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_370 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_371 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_372 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_373 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_374 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_375 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_376 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_377 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_378 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_379 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_380 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_381 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_382 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_383 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_384 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_385 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_386 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_387 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_388 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_389 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_390 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_391 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_392 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_393 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_394 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_395 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_396 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_397 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_398 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_399 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_400 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_401 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_402 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_403 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_404 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_405 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_406 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_407 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_408 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_409 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_410 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_411 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_412 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_413 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_414 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_415 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_416 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_417 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_418 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_419 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_420 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_421 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_422 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_423 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_424 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_425 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_426 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_427 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_428 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_429 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_430 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_431 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_432 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_433 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_434 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_435 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_436 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_437 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_438 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_439 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_440 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_441 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_442 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_443 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_444 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_445 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_446 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_447 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_448 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_449 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_450 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_451 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_452 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_453 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_454 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_455 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_456 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_457 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_458 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_459 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_460 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_461 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_462 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_463 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_464 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_465 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_466 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_467 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_468 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_469 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_470 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_471 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_472 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_473 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_474 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_475 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_476 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_477 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_478 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_479 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_480 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_481 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_482 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_483 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_484 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_485 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_486 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_487 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_488 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_489 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_490 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_491 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_492 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_493 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_494 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_495 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_496 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_497 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_498 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_499 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_500 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_501 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_502 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_503 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_504 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_505 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_506 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_507 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_508 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_509 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_510 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_511 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_512 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_513 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_514 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_515 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_516 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_517 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_518 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_519 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_520 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_521 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_522 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_523 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_524 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_525 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_526 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_527 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_528 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_529 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_530 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_531 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_532 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_533 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_534 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_535 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_536 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_537 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_538 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_539 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_540 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_541 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_542 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_543 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_544 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_545 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_546 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_547 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_548 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_549 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_550 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_551 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_552 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_553 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_554 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_555 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_556 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_557 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_558 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_559 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_560 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_561 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_562 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_563 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_564 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_565 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_566 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_567 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_568 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_569 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_570 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_571 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_572 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_573 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_574 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_575 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_576 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_577 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_578 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_579 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_580 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_581 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_582 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_583 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_584 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_585 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_586 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_587 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_588 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_589 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_590 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_591 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_592 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_593 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_594 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_595 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_596 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_597 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_598 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_599 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_600 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_601 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_602 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_603 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_604 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_605 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_606 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_607 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_608 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_609 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_610 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_611 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_612 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_613 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_614 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_615 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_616 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_617 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_618 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_619 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_620 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_621 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_622 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_623 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_624 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_625 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_626 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_627 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_628 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_629 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_630 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_631 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_632 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_633 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_634 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_635 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_636 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_637 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_638 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_639 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_640 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_641 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_642 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_643 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_644 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_645 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_646 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_647 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_648 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_649 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_650 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_651 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_652 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_653 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_654 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_655 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_656 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_657 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_658 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_659 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_660 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_661 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_662 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_663 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_664 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_665 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_666 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_667 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_668 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_669 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_670 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_671 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_672 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_673 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_674 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_675 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_676 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_677 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_678 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_679 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_680 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_681 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_682 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_683 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_684 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_685 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_686 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_687 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_688 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_689 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_690 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_691 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_692 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_693 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_694 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_695 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_696 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_697 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_698 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_699 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_700 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_701 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_702 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_703 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_704 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_705 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_706 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_707 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_708 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_709 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_710 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_711 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_712 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_713 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_714 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_715 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_716 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_717 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_718 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_719 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_720 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_721 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_722 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_723 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_724 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_725 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_726 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_727 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_728 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_729 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_730 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_731 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_732 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_733 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_734 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_735 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_736 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_737 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_738 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_739 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_740 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_741 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_742 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_743 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_744 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_745 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_746 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_747 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_748 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_749 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_750 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_751 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_752 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_753 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_754 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_755 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_756 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_757 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_758 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_759 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_760 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_761 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_762 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_763 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_764 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_765 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_766 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_767 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_768 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_769 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_770 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_771 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_772 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_773 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_774 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_775 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_776 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_777 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_778 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_779 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_780 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_781 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_782 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_783 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_784 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_785 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_786 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_787 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_788 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_789 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_790 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_791 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_792 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_793 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_794 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_795 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_796 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_797 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_798 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_799 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_800 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_801 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_802 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_803 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_804 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_805 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_806 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_807 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_808 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_809 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_810 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_811 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_812 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_813 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_814 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_815 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_816 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_817 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_818 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_819 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_820 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_821 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_822 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_823 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_824 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_825 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_826 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_827 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_828 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_829 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_830 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_831 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_832 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_833 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_834 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_835 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_836 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_837 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_838 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_839 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_840 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_841 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_842 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_843 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_844 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_845 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_846 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_847 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_848 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_849 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_850 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_851 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_852 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_853 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_854 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_855 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_856 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_857 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_858 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_859 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_860 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_861 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_862 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_863 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_864 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_865 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_866 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_867 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_868 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_869 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_870 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_871 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_872 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_873 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_874 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_875 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_876 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_877 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_878 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_879 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_880 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_881 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_882 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_883 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_884 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_885 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_886 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_887 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_888 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_889 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_890 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_891 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_892 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_893 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_894 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_895 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_896 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_897 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_898 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_899 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_900 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_901 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_902 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_903 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_904 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_905 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_906 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_907 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_908 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_909 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_910 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_911 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_912 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_913 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_914 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_915 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_916 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_917 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_918 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_919 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_920 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_921 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_922 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_923 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_924 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_925 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_926 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_927 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_928 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_929 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_930 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_931 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_932 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_933 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_934 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_935 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_936 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_937 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_938 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_939 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_940 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_941 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_942 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_943 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_944 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_945 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_946 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_947 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_948 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_949 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_950 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_951 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_952 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_953 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_954 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_955 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_956 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_957 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_958 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_959 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_960 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_961 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_962 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_963 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_964 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_965 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_966 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_967 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_968 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_969 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_970 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_971 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_972 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_973 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_974 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_975 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_976 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_977 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_978 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_979 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_980 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_981 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_982 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_983 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_984 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_985 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_986 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_987 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_988 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_989 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_990 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_991 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_992 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_993 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_994 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_995 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_996 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_997 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_998 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_999 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1000 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1001 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1002 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1003 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1004 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1005 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1006 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1007 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1008 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1009 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1010 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1011 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1012 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1013 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1014 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1015 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1016 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1017 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1018 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1019 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1020 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1021 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1022 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1023 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1024 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1025 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1026 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1027 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1028 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1029 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1030 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1031 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1032 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1033 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1034 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1035 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1036 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1037 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1038 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1039 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1040 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1041 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1042 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1043 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1044 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1045 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1046 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1047 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x128xf32>
%cst_1048 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1049 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1050 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1051 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1052 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1053 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1054 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1055 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1056 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1057 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1058 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1059 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1060 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1061 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1062 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1063 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1064 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1065 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1066 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1067 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1068 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1069 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1070 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1071 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1072 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1073 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1074 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1075 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1076 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1077 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1078 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1079 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1080 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1081 = constant opaque<"_", "0xDEADBEEF"> : tensor<512xf32>
%cst_1082 = constant opaque<"_", "0xDEADBEEF"> : tensor<128x512xf32>
%cst_1083 = constant opaque<"_", "0xDEADBEEF"> : tensor<128xf32>
%cst_1084 = constant opaque<"_", "0xDEADBEEF"> : tensor<512x128xf32>
%cst_1085 = constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32>
%cst_1086 = constant opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32>
%7 = linalg.tensor_expand_shape %arg2 [[0], [1, 2]] : tensor<1x384xi32> into tensor<1x384x1xi32>
%8 = "mhlo.torch_index_select"(%cst_4, %7) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<30522x128xf32>, tensor<1x384x1xi32>) -> tensor<1x384x1x128xf32>
%9 = "mhlo.reshape"(%8) : (tensor<1x384x1x128xf32>) -> tensor<1x384x128xf32>
%10 = "mhlo.slice"(%9) {limit_indices = dense<[1, 384, 128]> : tensor<3xi64>, start_indices = dense<[0, 1, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32>
%11 = "mhlo.pad"(%10, %5) {edge_padding_high = dense<[0, 1, 0]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32>
%12 = "mhlo.slice"(%9) {limit_indices = dense<[1, 383, 128]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x384x128xf32>) -> tensor<1x383x128xf32>
%13 = "mhlo.pad"(%12, %5) {edge_padding_high = dense<0> : tensor<3xi64>, edge_padding_low = dense<[0, 1, 0]> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x383x128xf32>, tensor<f32>) -> tensor<1x384x128xf32>
%14 = "mhlo.concatenate"(%11, %9, %13) {dimension = 2 : i64} : (tensor<1x384x128xf32>, tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x384xf32>
%15 = "mhlo.reshape"(%14) : (tensor<1x384x384xf32>) -> tensor<384x384xf32>
%16 = "mhlo.dot"(%15, %cst_2) : (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32>
%17 = chlo.broadcast_add %16, %cst_1 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%18 = "mhlo.reshape"(%17) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%19 = "mhlo.convert"(%arg1) : (tensor<1x384xi32>) -> tensor<1x384xf32>
%20 = "mhlo.reshape"(%19) : (tensor<1x384xf32>) -> tensor<1x1x384xf32>
%21 = chlo.broadcast_multiply %20, %0 : (tensor<1x1x384xf32>, tensor<1x384x1xf32>) -> tensor<1x384x384xf32>
%22 = linalg.tensor_expand_shape %21 [[0], [1, 2], [3]] : tensor<1x384x384xf32> into tensor<1x1x384x384xf32>
%23 = chlo.broadcast_multiply %22, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32>
%24 = chlo.broadcast_add %23, %3 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1x384x384xf32>, tensor<f32>) -> tensor<1x1x384x384xf32>
%25 = "mhlo.torch_index_select"(%cst_3, %arg0) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<2x512xf32>, tensor<1x384xi32>) -> tensor<1x384x512xf32>
%26 = chlo.broadcast_add %18, %25 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%27 = chlo.broadcast_add %26, %6 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%28 = chlo.broadcast_multiply %27, %cst_0 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%29 = chlo.broadcast_add %28, %cst {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%30 = "mhlo.reshape"(%29) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%31 = "mhlo.dot"(%30, %cst_14) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%32 = chlo.broadcast_add %31, %cst_13 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%33 = "mhlo.reshape"(%32) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%34 = "mhlo.transpose"(%33) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%35 = "mhlo.dot"(%30, %cst_18) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%36 = "mhlo.reshape"(%35) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%37 = "mhlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%38 = mhlo.add %36, %37 : tensor<1x384x128xf32>
%39 = chlo.broadcast_multiply %38, %cst_16 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%40 = chlo.broadcast_add %39, %cst_15 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%41 = "mhlo.reshape"(%40) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%42 = "mhlo.dot"(%41, %cst_10) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%43 = chlo.broadcast_add %42, %cst_9 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%44 = "mhlo.reshape"(%43) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%45 = "mhlo.transpose"(%44) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%46 = "mhlo.dot"(%41, %cst_12) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%47 = chlo.broadcast_add %46, %cst_11 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%48 = "mhlo.reshape"(%47) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%49 = "mhlo.transpose"(%48) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%50 = "mhlo.dot_general"(%49, %45) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%51 = chlo.broadcast_multiply %50, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%52 = chlo.broadcast_add %51, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%53 = "mhlo.reduce"(%52, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%54 = linalg.tensor_expand_shape %53 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%55 = chlo.broadcast_subtract %52, %54 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%56 = "mhlo.exponential"(%55) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%57 = "mhlo.reduce"(%56, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%58 = linalg.tensor_expand_shape %57 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%59 = chlo.broadcast_divide %56, %58 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%60 = "mhlo.dot_general"(%59, %34) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%61 = "mhlo.transpose"(%60) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%62 = "mhlo.reshape"(%61) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%63 = "mhlo.dot"(%62, %cst_8) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%64 = chlo.broadcast_add %63, %cst_7 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%65 = "mhlo.reshape"(%64) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%66 = "mhlo.dot"(%30, %cst_21) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%67 = chlo.broadcast_add %66, %cst_20 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%68 = "mhlo.reshape"(%67) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%69 = chlo.broadcast_multiply %68, %cst_19 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%70 = chlo.broadcast_add %69, %cst_7 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%71 = chlo.broadcast_add %65, %70 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%72 = chlo.broadcast_multiply %71, %cst_6 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%73 = chlo.broadcast_add %72, %cst_5 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%74 = "mhlo.reshape"(%73) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%75 = "mhlo.dot"(%74, %cst_23) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%76 = chlo.broadcast_add %75, %cst_22 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%77 = "mhlo.reshape"(%76) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%78 = chlo.broadcast_maximum %77, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%79 = "mhlo.reshape"(%78) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%80 = "mhlo.dot"(%79, %cst_27) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%81 = chlo.broadcast_add %80, %cst_26 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%82 = "mhlo.reshape"(%81) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%83 = chlo.broadcast_add %82, %73 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%84 = chlo.broadcast_multiply %83, %cst_25 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%85 = chlo.broadcast_add %84, %cst_24 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%86 = "mhlo.reshape"(%85) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%87 = "mhlo.dot"(%86, %cst_29) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%88 = chlo.broadcast_add %87, %cst_28 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%89 = "mhlo.reshape"(%88) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%90 = chlo.broadcast_maximum %89, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%91 = "mhlo.reshape"(%90) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%92 = "mhlo.dot"(%91, %cst_33) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%93 = chlo.broadcast_add %92, %cst_32 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%94 = "mhlo.reshape"(%93) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%95 = chlo.broadcast_add %94, %85 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%96 = chlo.broadcast_multiply %95, %cst_31 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%97 = chlo.broadcast_add %96, %cst_30 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%98 = "mhlo.reshape"(%97) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%99 = "mhlo.dot"(%98, %cst_35) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%100 = chlo.broadcast_add %99, %cst_34 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%101 = "mhlo.reshape"(%100) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%102 = chlo.broadcast_maximum %101, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%103 = "mhlo.reshape"(%102) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%104 = "mhlo.dot"(%103, %cst_39) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%105 = chlo.broadcast_add %104, %cst_38 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%106 = "mhlo.reshape"(%105) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%107 = chlo.broadcast_add %106, %97 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%108 = chlo.broadcast_multiply %107, %cst_37 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%109 = chlo.broadcast_add %108, %cst_36 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%110 = "mhlo.reshape"(%109) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%111 = "mhlo.dot"(%110, %cst_41) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%112 = chlo.broadcast_add %111, %cst_40 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%113 = "mhlo.reshape"(%112) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%114 = chlo.broadcast_maximum %113, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%115 = "mhlo.reshape"(%114) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%116 = "mhlo.dot"(%115, %cst_49) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%117 = chlo.broadcast_add %116, %cst_48 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%118 = "mhlo.reshape"(%117) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%119 = chlo.broadcast_add %118, %109 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%120 = chlo.broadcast_multiply %119, %cst_43 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%121 = chlo.broadcast_add %120, %cst_42 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%122 = "mhlo.reshape"(%121) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%123 = "mhlo.dot"(%122, %cst_47) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%124 = chlo.broadcast_add %123, %cst_46 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%125 = "mhlo.reshape"(%124) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%126 = chlo.broadcast_add %125, %29 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%127 = chlo.broadcast_multiply %126, %cst_45 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%128 = chlo.broadcast_add %127, %cst_44 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%129 = "mhlo.reshape"(%128) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%130 = "mhlo.dot"(%129, %cst_59) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%131 = chlo.broadcast_add %130, %cst_58 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%132 = "mhlo.reshape"(%131) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%133 = "mhlo.transpose"(%132) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%134 = "mhlo.dot"(%129, %cst_63) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%135 = "mhlo.reshape"(%134) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%136 = "mhlo.broadcast_in_dim"(%cst_62) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%137 = mhlo.add %135, %136 : tensor<1x384x128xf32>
%138 = chlo.broadcast_multiply %137, %cst_61 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%139 = chlo.broadcast_add %138, %cst_60 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%140 = "mhlo.reshape"(%139) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%141 = "mhlo.dot"(%140, %cst_55) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%142 = chlo.broadcast_add %141, %cst_54 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%143 = "mhlo.reshape"(%142) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%144 = "mhlo.transpose"(%143) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%145 = "mhlo.dot"(%140, %cst_57) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%146 = chlo.broadcast_add %145, %cst_56 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%147 = "mhlo.reshape"(%146) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%148 = "mhlo.transpose"(%147) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%149 = "mhlo.dot_general"(%148, %144) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%150 = chlo.broadcast_multiply %149, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%151 = chlo.broadcast_add %150, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%152 = "mhlo.reduce"(%151, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%153 = linalg.tensor_expand_shape %152 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%154 = chlo.broadcast_subtract %151, %153 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%155 = "mhlo.exponential"(%154) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%156 = "mhlo.reduce"(%155, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%157 = linalg.tensor_expand_shape %156 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%158 = chlo.broadcast_divide %155, %157 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%159 = "mhlo.dot_general"(%158, %133) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%160 = "mhlo.transpose"(%159) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%161 = "mhlo.reshape"(%160) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%162 = "mhlo.dot"(%161, %cst_53) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%163 = chlo.broadcast_add %162, %cst_52 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%164 = "mhlo.reshape"(%163) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%165 = "mhlo.dot"(%129, %cst_66) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%166 = chlo.broadcast_add %165, %cst_65 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%167 = "mhlo.reshape"(%166) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%168 = chlo.broadcast_multiply %167, %cst_64 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%169 = chlo.broadcast_add %168, %cst_52 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%170 = chlo.broadcast_add %164, %169 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%171 = chlo.broadcast_multiply %170, %cst_51 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%172 = chlo.broadcast_add %171, %cst_50 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%173 = "mhlo.reshape"(%172) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%174 = "mhlo.dot"(%173, %cst_68) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%175 = chlo.broadcast_add %174, %cst_67 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%176 = "mhlo.reshape"(%175) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%177 = chlo.broadcast_maximum %176, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%178 = "mhlo.reshape"(%177) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%179 = "mhlo.dot"(%178, %cst_72) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%180 = chlo.broadcast_add %179, %cst_71 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%181 = "mhlo.reshape"(%180) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%182 = chlo.broadcast_add %181, %172 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%183 = chlo.broadcast_multiply %182, %cst_70 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%184 = chlo.broadcast_add %183, %cst_69 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%185 = "mhlo.reshape"(%184) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%186 = "mhlo.dot"(%185, %cst_74) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%187 = chlo.broadcast_add %186, %cst_73 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%188 = "mhlo.reshape"(%187) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%189 = chlo.broadcast_maximum %188, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%190 = "mhlo.reshape"(%189) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%191 = "mhlo.dot"(%190, %cst_78) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%192 = chlo.broadcast_add %191, %cst_77 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%193 = "mhlo.reshape"(%192) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%194 = chlo.broadcast_add %193, %184 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%195 = chlo.broadcast_multiply %194, %cst_76 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%196 = chlo.broadcast_add %195, %cst_75 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%197 = "mhlo.reshape"(%196) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%198 = "mhlo.dot"(%197, %cst_80) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%199 = chlo.broadcast_add %198, %cst_79 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%200 = "mhlo.reshape"(%199) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%201 = chlo.broadcast_maximum %200, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%202 = "mhlo.reshape"(%201) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%203 = "mhlo.dot"(%202, %cst_84) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%204 = chlo.broadcast_add %203, %cst_83 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%205 = "mhlo.reshape"(%204) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%206 = chlo.broadcast_add %205, %196 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%207 = chlo.broadcast_multiply %206, %cst_82 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%208 = chlo.broadcast_add %207, %cst_81 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%209 = "mhlo.reshape"(%208) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%210 = "mhlo.dot"(%209, %cst_86) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%211 = chlo.broadcast_add %210, %cst_85 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%212 = "mhlo.reshape"(%211) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%213 = chlo.broadcast_maximum %212, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%214 = "mhlo.reshape"(%213) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%215 = "mhlo.dot"(%214, %cst_94) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%216 = chlo.broadcast_add %215, %cst_93 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%217 = "mhlo.reshape"(%216) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%218 = chlo.broadcast_add %217, %208 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%219 = chlo.broadcast_multiply %218, %cst_88 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%220 = chlo.broadcast_add %219, %cst_87 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%221 = "mhlo.reshape"(%220) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%222 = "mhlo.dot"(%221, %cst_92) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%223 = chlo.broadcast_add %222, %cst_91 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%224 = "mhlo.reshape"(%223) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%225 = chlo.broadcast_add %224, %128 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%226 = chlo.broadcast_multiply %225, %cst_90 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%227 = chlo.broadcast_add %226, %cst_89 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%228 = "mhlo.reshape"(%227) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%229 = "mhlo.dot"(%228, %cst_554) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%230 = chlo.broadcast_add %229, %cst_553 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%231 = "mhlo.reshape"(%230) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%232 = "mhlo.transpose"(%231) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%233 = "mhlo.dot"(%228, %cst_558) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%234 = "mhlo.reshape"(%233) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%235 = "mhlo.broadcast_in_dim"(%cst_557) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%236 = mhlo.add %234, %235 : tensor<1x384x128xf32>
%237 = chlo.broadcast_multiply %236, %cst_556 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%238 = chlo.broadcast_add %237, %cst_555 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%239 = "mhlo.reshape"(%238) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%240 = "mhlo.dot"(%239, %cst_550) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%241 = chlo.broadcast_add %240, %cst_549 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%242 = "mhlo.reshape"(%241) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%243 = "mhlo.transpose"(%242) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%244 = "mhlo.dot"(%239, %cst_552) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%245 = chlo.broadcast_add %244, %cst_551 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%246 = "mhlo.reshape"(%245) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%247 = "mhlo.transpose"(%246) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%248 = "mhlo.dot_general"(%247, %243) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%249 = chlo.broadcast_multiply %248, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%250 = chlo.broadcast_add %249, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%251 = "mhlo.reduce"(%250, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%252 = linalg.tensor_expand_shape %251 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%253 = chlo.broadcast_subtract %250, %252 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%254 = "mhlo.exponential"(%253) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%255 = "mhlo.reduce"(%254, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%256 = linalg.tensor_expand_shape %255 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%257 = chlo.broadcast_divide %254, %256 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%258 = "mhlo.dot_general"(%257, %232) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%259 = "mhlo.transpose"(%258) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%260 = "mhlo.reshape"(%259) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%261 = "mhlo.dot"(%260, %cst_548) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%262 = chlo.broadcast_add %261, %cst_547 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%263 = "mhlo.reshape"(%262) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%264 = "mhlo.dot"(%228, %cst_561) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%265 = chlo.broadcast_add %264, %cst_560 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%266 = "mhlo.reshape"(%265) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%267 = chlo.broadcast_multiply %266, %cst_559 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%268 = chlo.broadcast_add %267, %cst_547 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%269 = chlo.broadcast_add %263, %268 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%270 = chlo.broadcast_multiply %269, %cst_546 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%271 = chlo.broadcast_add %270, %cst_545 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%272 = "mhlo.reshape"(%271) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%273 = "mhlo.dot"(%272, %cst_563) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%274 = chlo.broadcast_add %273, %cst_562 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%275 = "mhlo.reshape"(%274) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%276 = chlo.broadcast_maximum %275, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%277 = "mhlo.reshape"(%276) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%278 = "mhlo.dot"(%277, %cst_567) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%279 = chlo.broadcast_add %278, %cst_566 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%280 = "mhlo.reshape"(%279) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%281 = chlo.broadcast_add %280, %271 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%282 = chlo.broadcast_multiply %281, %cst_565 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%283 = chlo.broadcast_add %282, %cst_564 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%284 = "mhlo.reshape"(%283) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%285 = "mhlo.dot"(%284, %cst_569) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%286 = chlo.broadcast_add %285, %cst_568 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%287 = "mhlo.reshape"(%286) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%288 = chlo.broadcast_maximum %287, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%289 = "mhlo.reshape"(%288) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%290 = "mhlo.dot"(%289, %cst_573) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%291 = chlo.broadcast_add %290, %cst_572 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%292 = "mhlo.reshape"(%291) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%293 = chlo.broadcast_add %292, %283 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%294 = chlo.broadcast_multiply %293, %cst_571 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%295 = chlo.broadcast_add %294, %cst_570 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%296 = "mhlo.reshape"(%295) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%297 = "mhlo.dot"(%296, %cst_575) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%298 = chlo.broadcast_add %297, %cst_574 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%299 = "mhlo.reshape"(%298) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%300 = chlo.broadcast_maximum %299, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%301 = "mhlo.reshape"(%300) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%302 = "mhlo.dot"(%301, %cst_579) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%303 = chlo.broadcast_add %302, %cst_578 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%304 = "mhlo.reshape"(%303) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%305 = chlo.broadcast_add %304, %295 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%306 = chlo.broadcast_multiply %305, %cst_577 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%307 = chlo.broadcast_add %306, %cst_576 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%308 = "mhlo.reshape"(%307) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%309 = "mhlo.dot"(%308, %cst_581) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%310 = chlo.broadcast_add %309, %cst_580 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%311 = "mhlo.reshape"(%310) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%312 = chlo.broadcast_maximum %311, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%313 = "mhlo.reshape"(%312) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%314 = "mhlo.dot"(%313, %cst_589) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%315 = chlo.broadcast_add %314, %cst_588 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%316 = "mhlo.reshape"(%315) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%317 = chlo.broadcast_add %316, %307 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%318 = chlo.broadcast_multiply %317, %cst_583 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%319 = chlo.broadcast_add %318, %cst_582 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%320 = "mhlo.reshape"(%319) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%321 = "mhlo.dot"(%320, %cst_587) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%322 = chlo.broadcast_add %321, %cst_586 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%323 = "mhlo.reshape"(%322) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%324 = chlo.broadcast_add %323, %227 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%325 = chlo.broadcast_multiply %324, %cst_585 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%326 = chlo.broadcast_add %325, %cst_584 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%327 = "mhlo.reshape"(%326) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%328 = "mhlo.dot"(%327, %cst_779) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%329 = chlo.broadcast_add %328, %cst_778 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%330 = "mhlo.reshape"(%329) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%331 = "mhlo.transpose"(%330) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%332 = "mhlo.dot"(%327, %cst_783) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%333 = "mhlo.reshape"(%332) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%334 = "mhlo.broadcast_in_dim"(%cst_782) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%335 = mhlo.add %333, %334 : tensor<1x384x128xf32>
%336 = chlo.broadcast_multiply %335, %cst_781 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%337 = chlo.broadcast_add %336, %cst_780 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%338 = "mhlo.reshape"(%337) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%339 = "mhlo.dot"(%338, %cst_775) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%340 = chlo.broadcast_add %339, %cst_774 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%341 = "mhlo.reshape"(%340) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%342 = "mhlo.transpose"(%341) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%343 = "mhlo.dot"(%338, %cst_777) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%344 = chlo.broadcast_add %343, %cst_776 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%345 = "mhlo.reshape"(%344) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%346 = "mhlo.transpose"(%345) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%347 = "mhlo.dot_general"(%346, %342) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%348 = chlo.broadcast_multiply %347, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%349 = chlo.broadcast_add %348, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%350 = "mhlo.reduce"(%349, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%351 = linalg.tensor_expand_shape %350 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%352 = chlo.broadcast_subtract %349, %351 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%353 = "mhlo.exponential"(%352) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%354 = "mhlo.reduce"(%353, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%355 = linalg.tensor_expand_shape %354 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%356 = chlo.broadcast_divide %353, %355 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%357 = "mhlo.dot_general"(%356, %331) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%358 = "mhlo.transpose"(%357) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%359 = "mhlo.reshape"(%358) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%360 = "mhlo.dot"(%359, %cst_773) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%361 = chlo.broadcast_add %360, %cst_772 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%362 = "mhlo.reshape"(%361) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%363 = "mhlo.dot"(%327, %cst_786) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%364 = chlo.broadcast_add %363, %cst_785 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%365 = "mhlo.reshape"(%364) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%366 = chlo.broadcast_multiply %365, %cst_784 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%367 = chlo.broadcast_add %366, %cst_772 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%368 = chlo.broadcast_add %362, %367 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%369 = chlo.broadcast_multiply %368, %cst_771 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%370 = chlo.broadcast_add %369, %cst_770 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%371 = "mhlo.reshape"(%370) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%372 = "mhlo.dot"(%371, %cst_788) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%373 = chlo.broadcast_add %372, %cst_787 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%374 = "mhlo.reshape"(%373) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%375 = chlo.broadcast_maximum %374, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%376 = "mhlo.reshape"(%375) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%377 = "mhlo.dot"(%376, %cst_792) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%378 = chlo.broadcast_add %377, %cst_791 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%379 = "mhlo.reshape"(%378) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%380 = chlo.broadcast_add %379, %370 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%381 = chlo.broadcast_multiply %380, %cst_790 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%382 = chlo.broadcast_add %381, %cst_789 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%383 = "mhlo.reshape"(%382) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%384 = "mhlo.dot"(%383, %cst_794) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%385 = chlo.broadcast_add %384, %cst_793 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%386 = "mhlo.reshape"(%385) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%387 = chlo.broadcast_maximum %386, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%388 = "mhlo.reshape"(%387) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%389 = "mhlo.dot"(%388, %cst_798) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%390 = chlo.broadcast_add %389, %cst_797 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%391 = "mhlo.reshape"(%390) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%392 = chlo.broadcast_add %391, %382 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%393 = chlo.broadcast_multiply %392, %cst_796 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%394 = chlo.broadcast_add %393, %cst_795 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%395 = "mhlo.reshape"(%394) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%396 = "mhlo.dot"(%395, %cst_800) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%397 = chlo.broadcast_add %396, %cst_799 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%398 = "mhlo.reshape"(%397) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%399 = chlo.broadcast_maximum %398, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%400 = "mhlo.reshape"(%399) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%401 = "mhlo.dot"(%400, %cst_804) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%402 = chlo.broadcast_add %401, %cst_803 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%403 = "mhlo.reshape"(%402) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%404 = chlo.broadcast_add %403, %394 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%405 = chlo.broadcast_multiply %404, %cst_802 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%406 = chlo.broadcast_add %405, %cst_801 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%407 = "mhlo.reshape"(%406) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%408 = "mhlo.dot"(%407, %cst_806) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%409 = chlo.broadcast_add %408, %cst_805 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%410 = "mhlo.reshape"(%409) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%411 = chlo.broadcast_maximum %410, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%412 = "mhlo.reshape"(%411) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%413 = "mhlo.dot"(%412, %cst_814) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%414 = chlo.broadcast_add %413, %cst_813 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%415 = "mhlo.reshape"(%414) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%416 = chlo.broadcast_add %415, %406 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%417 = chlo.broadcast_multiply %416, %cst_808 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%418 = chlo.broadcast_add %417, %cst_807 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%419 = "mhlo.reshape"(%418) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%420 = "mhlo.dot"(%419, %cst_812) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%421 = chlo.broadcast_add %420, %cst_811 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%422 = "mhlo.reshape"(%421) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%423 = chlo.broadcast_add %422, %326 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%424 = chlo.broadcast_multiply %423, %cst_810 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%425 = chlo.broadcast_add %424, %cst_809 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%426 = "mhlo.reshape"(%425) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%427 = "mhlo.dot"(%426, %cst_824) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%428 = chlo.broadcast_add %427, %cst_823 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%429 = "mhlo.reshape"(%428) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%430 = "mhlo.transpose"(%429) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%431 = "mhlo.dot"(%426, %cst_828) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%432 = "mhlo.reshape"(%431) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%433 = "mhlo.broadcast_in_dim"(%cst_827) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%434 = mhlo.add %432, %433 : tensor<1x384x128xf32>
%435 = chlo.broadcast_multiply %434, %cst_826 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%436 = chlo.broadcast_add %435, %cst_825 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%437 = "mhlo.reshape"(%436) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%438 = "mhlo.dot"(%437, %cst_820) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%439 = chlo.broadcast_add %438, %cst_819 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%440 = "mhlo.reshape"(%439) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%441 = "mhlo.transpose"(%440) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%442 = "mhlo.dot"(%437, %cst_822) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%443 = chlo.broadcast_add %442, %cst_821 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%444 = "mhlo.reshape"(%443) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%445 = "mhlo.transpose"(%444) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%446 = "mhlo.dot_general"(%445, %441) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%447 = chlo.broadcast_multiply %446, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%448 = chlo.broadcast_add %447, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%449 = "mhlo.reduce"(%448, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%450 = linalg.tensor_expand_shape %449 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%451 = chlo.broadcast_subtract %448, %450 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%452 = "mhlo.exponential"(%451) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%453 = "mhlo.reduce"(%452, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%454 = linalg.tensor_expand_shape %453 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%455 = chlo.broadcast_divide %452, %454 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%456 = "mhlo.dot_general"(%455, %430) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%457 = "mhlo.transpose"(%456) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%458 = "mhlo.reshape"(%457) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%459 = "mhlo.dot"(%458, %cst_818) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%460 = chlo.broadcast_add %459, %cst_817 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%461 = "mhlo.reshape"(%460) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%462 = "mhlo.dot"(%426, %cst_831) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%463 = chlo.broadcast_add %462, %cst_830 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%464 = "mhlo.reshape"(%463) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%465 = chlo.broadcast_multiply %464, %cst_829 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%466 = chlo.broadcast_add %465, %cst_817 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%467 = chlo.broadcast_add %461, %466 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%468 = chlo.broadcast_multiply %467, %cst_816 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%469 = chlo.broadcast_add %468, %cst_815 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%470 = "mhlo.reshape"(%469) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%471 = "mhlo.dot"(%470, %cst_833) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%472 = chlo.broadcast_add %471, %cst_832 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%473 = "mhlo.reshape"(%472) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%474 = chlo.broadcast_maximum %473, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%475 = "mhlo.reshape"(%474) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%476 = "mhlo.dot"(%475, %cst_837) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%477 = chlo.broadcast_add %476, %cst_836 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%478 = "mhlo.reshape"(%477) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%479 = chlo.broadcast_add %478, %469 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%480 = chlo.broadcast_multiply %479, %cst_835 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%481 = chlo.broadcast_add %480, %cst_834 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%482 = "mhlo.reshape"(%481) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%483 = "mhlo.dot"(%482, %cst_839) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%484 = chlo.broadcast_add %483, %cst_838 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%485 = "mhlo.reshape"(%484) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%486 = chlo.broadcast_maximum %485, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%487 = "mhlo.reshape"(%486) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%488 = "mhlo.dot"(%487, %cst_843) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%489 = chlo.broadcast_add %488, %cst_842 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%490 = "mhlo.reshape"(%489) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%491 = chlo.broadcast_add %490, %481 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%492 = chlo.broadcast_multiply %491, %cst_841 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%493 = chlo.broadcast_add %492, %cst_840 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%494 = "mhlo.reshape"(%493) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%495 = "mhlo.dot"(%494, %cst_845) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%496 = chlo.broadcast_add %495, %cst_844 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%497 = "mhlo.reshape"(%496) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%498 = chlo.broadcast_maximum %497, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%499 = "mhlo.reshape"(%498) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%500 = "mhlo.dot"(%499, %cst_849) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%501 = chlo.broadcast_add %500, %cst_848 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%502 = "mhlo.reshape"(%501) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%503 = chlo.broadcast_add %502, %493 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%504 = chlo.broadcast_multiply %503, %cst_847 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%505 = chlo.broadcast_add %504, %cst_846 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%506 = "mhlo.reshape"(%505) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%507 = "mhlo.dot"(%506, %cst_851) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%508 = chlo.broadcast_add %507, %cst_850 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%509 = "mhlo.reshape"(%508) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%510 = chlo.broadcast_maximum %509, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%511 = "mhlo.reshape"(%510) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%512 = "mhlo.dot"(%511, %cst_859) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%513 = chlo.broadcast_add %512, %cst_858 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%514 = "mhlo.reshape"(%513) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%515 = chlo.broadcast_add %514, %505 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%516 = chlo.broadcast_multiply %515, %cst_853 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%517 = chlo.broadcast_add %516, %cst_852 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%518 = "mhlo.reshape"(%517) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%519 = "mhlo.dot"(%518, %cst_857) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%520 = chlo.broadcast_add %519, %cst_856 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%521 = "mhlo.reshape"(%520) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%522 = chlo.broadcast_add %521, %425 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%523 = chlo.broadcast_multiply %522, %cst_855 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%524 = chlo.broadcast_add %523, %cst_854 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%525 = "mhlo.reshape"(%524) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%526 = "mhlo.dot"(%525, %cst_869) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%527 = chlo.broadcast_add %526, %cst_868 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%528 = "mhlo.reshape"(%527) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%529 = "mhlo.transpose"(%528) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%530 = "mhlo.dot"(%525, %cst_873) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%531 = "mhlo.reshape"(%530) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%532 = "mhlo.broadcast_in_dim"(%cst_872) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%533 = mhlo.add %531, %532 : tensor<1x384x128xf32>
%534 = chlo.broadcast_multiply %533, %cst_871 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%535 = chlo.broadcast_add %534, %cst_870 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%536 = "mhlo.reshape"(%535) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%537 = "mhlo.dot"(%536, %cst_865) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%538 = chlo.broadcast_add %537, %cst_864 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%539 = "mhlo.reshape"(%538) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%540 = "mhlo.transpose"(%539) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%541 = "mhlo.dot"(%536, %cst_867) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%542 = chlo.broadcast_add %541, %cst_866 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%543 = "mhlo.reshape"(%542) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%544 = "mhlo.transpose"(%543) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%545 = "mhlo.dot_general"(%544, %540) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%546 = chlo.broadcast_multiply %545, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%547 = chlo.broadcast_add %546, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%548 = "mhlo.reduce"(%547, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%549 = linalg.tensor_expand_shape %548 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%550 = chlo.broadcast_subtract %547, %549 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%551 = "mhlo.exponential"(%550) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%552 = "mhlo.reduce"(%551, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%553 = linalg.tensor_expand_shape %552 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%554 = chlo.broadcast_divide %551, %553 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%555 = "mhlo.dot_general"(%554, %529) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%556 = "mhlo.transpose"(%555) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%557 = "mhlo.reshape"(%556) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%558 = "mhlo.dot"(%557, %cst_863) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%559 = chlo.broadcast_add %558, %cst_862 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%560 = "mhlo.reshape"(%559) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%561 = "mhlo.dot"(%525, %cst_876) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%562 = chlo.broadcast_add %561, %cst_875 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%563 = "mhlo.reshape"(%562) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%564 = chlo.broadcast_multiply %563, %cst_874 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%565 = chlo.broadcast_add %564, %cst_862 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%566 = chlo.broadcast_add %560, %565 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%567 = chlo.broadcast_multiply %566, %cst_861 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%568 = chlo.broadcast_add %567, %cst_860 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%569 = "mhlo.reshape"(%568) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%570 = "mhlo.dot"(%569, %cst_878) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%571 = chlo.broadcast_add %570, %cst_877 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%572 = "mhlo.reshape"(%571) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%573 = chlo.broadcast_maximum %572, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%574 = "mhlo.reshape"(%573) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%575 = "mhlo.dot"(%574, %cst_882) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%576 = chlo.broadcast_add %575, %cst_881 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%577 = "mhlo.reshape"(%576) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%578 = chlo.broadcast_add %577, %568 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%579 = chlo.broadcast_multiply %578, %cst_880 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%580 = chlo.broadcast_add %579, %cst_879 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%581 = "mhlo.reshape"(%580) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%582 = "mhlo.dot"(%581, %cst_884) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%583 = chlo.broadcast_add %582, %cst_883 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%584 = "mhlo.reshape"(%583) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%585 = chlo.broadcast_maximum %584, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%586 = "mhlo.reshape"(%585) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%587 = "mhlo.dot"(%586, %cst_888) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%588 = chlo.broadcast_add %587, %cst_887 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%589 = "mhlo.reshape"(%588) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%590 = chlo.broadcast_add %589, %580 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%591 = chlo.broadcast_multiply %590, %cst_886 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%592 = chlo.broadcast_add %591, %cst_885 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%593 = "mhlo.reshape"(%592) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%594 = "mhlo.dot"(%593, %cst_890) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%595 = chlo.broadcast_add %594, %cst_889 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%596 = "mhlo.reshape"(%595) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%597 = chlo.broadcast_maximum %596, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%598 = "mhlo.reshape"(%597) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%599 = "mhlo.dot"(%598, %cst_894) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%600 = chlo.broadcast_add %599, %cst_893 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%601 = "mhlo.reshape"(%600) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%602 = chlo.broadcast_add %601, %592 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%603 = chlo.broadcast_multiply %602, %cst_892 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%604 = chlo.broadcast_add %603, %cst_891 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%605 = "mhlo.reshape"(%604) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%606 = "mhlo.dot"(%605, %cst_896) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%607 = chlo.broadcast_add %606, %cst_895 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%608 = "mhlo.reshape"(%607) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%609 = chlo.broadcast_maximum %608, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%610 = "mhlo.reshape"(%609) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%611 = "mhlo.dot"(%610, %cst_904) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%612 = chlo.broadcast_add %611, %cst_903 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%613 = "mhlo.reshape"(%612) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%614 = chlo.broadcast_add %613, %604 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%615 = chlo.broadcast_multiply %614, %cst_898 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%616 = chlo.broadcast_add %615, %cst_897 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%617 = "mhlo.reshape"(%616) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%618 = "mhlo.dot"(%617, %cst_902) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%619 = chlo.broadcast_add %618, %cst_901 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%620 = "mhlo.reshape"(%619) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%621 = chlo.broadcast_add %620, %524 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%622 = chlo.broadcast_multiply %621, %cst_900 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%623 = chlo.broadcast_add %622, %cst_899 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%624 = "mhlo.reshape"(%623) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%625 = "mhlo.dot"(%624, %cst_914) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%626 = chlo.broadcast_add %625, %cst_913 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%627 = "mhlo.reshape"(%626) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%628 = "mhlo.transpose"(%627) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%629 = "mhlo.dot"(%624, %cst_918) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%630 = "mhlo.reshape"(%629) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%631 = "mhlo.broadcast_in_dim"(%cst_917) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%632 = mhlo.add %630, %631 : tensor<1x384x128xf32>
%633 = chlo.broadcast_multiply %632, %cst_916 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%634 = chlo.broadcast_add %633, %cst_915 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%635 = "mhlo.reshape"(%634) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%636 = "mhlo.dot"(%635, %cst_910) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%637 = chlo.broadcast_add %636, %cst_909 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%638 = "mhlo.reshape"(%637) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%639 = "mhlo.transpose"(%638) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%640 = "mhlo.dot"(%635, %cst_912) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%641 = chlo.broadcast_add %640, %cst_911 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%642 = "mhlo.reshape"(%641) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%643 = "mhlo.transpose"(%642) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%644 = "mhlo.dot_general"(%643, %639) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%645 = chlo.broadcast_multiply %644, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%646 = chlo.broadcast_add %645, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%647 = "mhlo.reduce"(%646, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%648 = linalg.tensor_expand_shape %647 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%649 = chlo.broadcast_subtract %646, %648 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%650 = "mhlo.exponential"(%649) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%651 = "mhlo.reduce"(%650, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%652 = linalg.tensor_expand_shape %651 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%653 = chlo.broadcast_divide %650, %652 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%654 = "mhlo.dot_general"(%653, %628) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%655 = "mhlo.transpose"(%654) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%656 = "mhlo.reshape"(%655) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%657 = "mhlo.dot"(%656, %cst_908) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%658 = chlo.broadcast_add %657, %cst_907 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%659 = "mhlo.reshape"(%658) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%660 = "mhlo.dot"(%624, %cst_921) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%661 = chlo.broadcast_add %660, %cst_920 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%662 = "mhlo.reshape"(%661) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%663 = chlo.broadcast_multiply %662, %cst_919 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%664 = chlo.broadcast_add %663, %cst_907 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%665 = chlo.broadcast_add %659, %664 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%666 = chlo.broadcast_multiply %665, %cst_906 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%667 = chlo.broadcast_add %666, %cst_905 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%668 = "mhlo.reshape"(%667) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%669 = "mhlo.dot"(%668, %cst_923) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%670 = chlo.broadcast_add %669, %cst_922 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%671 = "mhlo.reshape"(%670) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%672 = chlo.broadcast_maximum %671, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%673 = "mhlo.reshape"(%672) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%674 = "mhlo.dot"(%673, %cst_927) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%675 = chlo.broadcast_add %674, %cst_926 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%676 = "mhlo.reshape"(%675) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%677 = chlo.broadcast_add %676, %667 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%678 = chlo.broadcast_multiply %677, %cst_925 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%679 = chlo.broadcast_add %678, %cst_924 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%680 = "mhlo.reshape"(%679) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%681 = "mhlo.dot"(%680, %cst_929) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%682 = chlo.broadcast_add %681, %cst_928 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%683 = "mhlo.reshape"(%682) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%684 = chlo.broadcast_maximum %683, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%685 = "mhlo.reshape"(%684) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%686 = "mhlo.dot"(%685, %cst_933) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%687 = chlo.broadcast_add %686, %cst_932 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%688 = "mhlo.reshape"(%687) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%689 = chlo.broadcast_add %688, %679 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%690 = chlo.broadcast_multiply %689, %cst_931 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%691 = chlo.broadcast_add %690, %cst_930 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%692 = "mhlo.reshape"(%691) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%693 = "mhlo.dot"(%692, %cst_935) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%694 = chlo.broadcast_add %693, %cst_934 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%695 = "mhlo.reshape"(%694) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%696 = chlo.broadcast_maximum %695, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%697 = "mhlo.reshape"(%696) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%698 = "mhlo.dot"(%697, %cst_939) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%699 = chlo.broadcast_add %698, %cst_938 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%700 = "mhlo.reshape"(%699) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%701 = chlo.broadcast_add %700, %691 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%702 = chlo.broadcast_multiply %701, %cst_937 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%703 = chlo.broadcast_add %702, %cst_936 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%704 = "mhlo.reshape"(%703) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%705 = "mhlo.dot"(%704, %cst_941) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%706 = chlo.broadcast_add %705, %cst_940 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%707 = "mhlo.reshape"(%706) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%708 = chlo.broadcast_maximum %707, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%709 = "mhlo.reshape"(%708) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%710 = "mhlo.dot"(%709, %cst_949) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%711 = chlo.broadcast_add %710, %cst_948 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%712 = "mhlo.reshape"(%711) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%713 = chlo.broadcast_add %712, %703 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%714 = chlo.broadcast_multiply %713, %cst_943 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%715 = chlo.broadcast_add %714, %cst_942 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%716 = "mhlo.reshape"(%715) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%717 = "mhlo.dot"(%716, %cst_947) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%718 = chlo.broadcast_add %717, %cst_946 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%719 = "mhlo.reshape"(%718) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%720 = chlo.broadcast_add %719, %623 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%721 = chlo.broadcast_multiply %720, %cst_945 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%722 = chlo.broadcast_add %721, %cst_944 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%723 = "mhlo.reshape"(%722) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%724 = "mhlo.dot"(%723, %cst_959) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%725 = chlo.broadcast_add %724, %cst_958 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%726 = "mhlo.reshape"(%725) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%727 = "mhlo.transpose"(%726) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%728 = "mhlo.dot"(%723, %cst_963) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%729 = "mhlo.reshape"(%728) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%730 = "mhlo.broadcast_in_dim"(%cst_962) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%731 = mhlo.add %729, %730 : tensor<1x384x128xf32>
%732 = chlo.broadcast_multiply %731, %cst_961 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%733 = chlo.broadcast_add %732, %cst_960 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%734 = "mhlo.reshape"(%733) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%735 = "mhlo.dot"(%734, %cst_955) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%736 = chlo.broadcast_add %735, %cst_954 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%737 = "mhlo.reshape"(%736) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%738 = "mhlo.transpose"(%737) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%739 = "mhlo.dot"(%734, %cst_957) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%740 = chlo.broadcast_add %739, %cst_956 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%741 = "mhlo.reshape"(%740) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%742 = "mhlo.transpose"(%741) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%743 = "mhlo.dot_general"(%742, %738) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%744 = chlo.broadcast_multiply %743, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%745 = chlo.broadcast_add %744, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%746 = "mhlo.reduce"(%745, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%747 = linalg.tensor_expand_shape %746 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%748 = chlo.broadcast_subtract %745, %747 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%749 = "mhlo.exponential"(%748) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%750 = "mhlo.reduce"(%749, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%751 = linalg.tensor_expand_shape %750 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%752 = chlo.broadcast_divide %749, %751 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%753 = "mhlo.dot_general"(%752, %727) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%754 = "mhlo.transpose"(%753) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%755 = "mhlo.reshape"(%754) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%756 = "mhlo.dot"(%755, %cst_953) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%757 = chlo.broadcast_add %756, %cst_952 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%758 = "mhlo.reshape"(%757) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%759 = "mhlo.dot"(%723, %cst_966) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%760 = chlo.broadcast_add %759, %cst_965 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%761 = "mhlo.reshape"(%760) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%762 = chlo.broadcast_multiply %761, %cst_964 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%763 = chlo.broadcast_add %762, %cst_952 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%764 = chlo.broadcast_add %758, %763 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%765 = chlo.broadcast_multiply %764, %cst_951 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%766 = chlo.broadcast_add %765, %cst_950 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%767 = "mhlo.reshape"(%766) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%768 = "mhlo.dot"(%767, %cst_968) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%769 = chlo.broadcast_add %768, %cst_967 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%770 = "mhlo.reshape"(%769) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%771 = chlo.broadcast_maximum %770, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%772 = "mhlo.reshape"(%771) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%773 = "mhlo.dot"(%772, %cst_972) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%774 = chlo.broadcast_add %773, %cst_971 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%775 = "mhlo.reshape"(%774) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%776 = chlo.broadcast_add %775, %766 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%777 = chlo.broadcast_multiply %776, %cst_970 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%778 = chlo.broadcast_add %777, %cst_969 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%779 = "mhlo.reshape"(%778) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%780 = "mhlo.dot"(%779, %cst_974) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%781 = chlo.broadcast_add %780, %cst_973 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%782 = "mhlo.reshape"(%781) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%783 = chlo.broadcast_maximum %782, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%784 = "mhlo.reshape"(%783) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%785 = "mhlo.dot"(%784, %cst_978) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%786 = chlo.broadcast_add %785, %cst_977 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%787 = "mhlo.reshape"(%786) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%788 = chlo.broadcast_add %787, %778 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%789 = chlo.broadcast_multiply %788, %cst_976 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%790 = chlo.broadcast_add %789, %cst_975 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%791 = "mhlo.reshape"(%790) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%792 = "mhlo.dot"(%791, %cst_980) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%793 = chlo.broadcast_add %792, %cst_979 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%794 = "mhlo.reshape"(%793) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%795 = chlo.broadcast_maximum %794, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%796 = "mhlo.reshape"(%795) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%797 = "mhlo.dot"(%796, %cst_984) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%798 = chlo.broadcast_add %797, %cst_983 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%799 = "mhlo.reshape"(%798) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%800 = chlo.broadcast_add %799, %790 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%801 = chlo.broadcast_multiply %800, %cst_982 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%802 = chlo.broadcast_add %801, %cst_981 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%803 = "mhlo.reshape"(%802) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%804 = "mhlo.dot"(%803, %cst_986) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%805 = chlo.broadcast_add %804, %cst_985 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%806 = "mhlo.reshape"(%805) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%807 = chlo.broadcast_maximum %806, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%808 = "mhlo.reshape"(%807) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%809 = "mhlo.dot"(%808, %cst_994) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%810 = chlo.broadcast_add %809, %cst_993 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%811 = "mhlo.reshape"(%810) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%812 = chlo.broadcast_add %811, %802 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%813 = chlo.broadcast_multiply %812, %cst_988 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%814 = chlo.broadcast_add %813, %cst_987 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%815 = "mhlo.reshape"(%814) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%816 = "mhlo.dot"(%815, %cst_992) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%817 = chlo.broadcast_add %816, %cst_991 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%818 = "mhlo.reshape"(%817) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%819 = chlo.broadcast_add %818, %722 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%820 = chlo.broadcast_multiply %819, %cst_990 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%821 = chlo.broadcast_add %820, %cst_989 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%822 = "mhlo.reshape"(%821) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%823 = "mhlo.dot"(%822, %cst_1004) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%824 = chlo.broadcast_add %823, %cst_1003 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%825 = "mhlo.reshape"(%824) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%826 = "mhlo.transpose"(%825) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%827 = "mhlo.dot"(%822, %cst_1008) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%828 = "mhlo.reshape"(%827) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%829 = "mhlo.broadcast_in_dim"(%cst_1007) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%830 = mhlo.add %828, %829 : tensor<1x384x128xf32>
%831 = chlo.broadcast_multiply %830, %cst_1006 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%832 = chlo.broadcast_add %831, %cst_1005 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%833 = "mhlo.reshape"(%832) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%834 = "mhlo.dot"(%833, %cst_1000) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%835 = chlo.broadcast_add %834, %cst_999 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%836 = "mhlo.reshape"(%835) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%837 = "mhlo.transpose"(%836) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%838 = "mhlo.dot"(%833, %cst_1002) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%839 = chlo.broadcast_add %838, %cst_1001 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%840 = "mhlo.reshape"(%839) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%841 = "mhlo.transpose"(%840) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%842 = "mhlo.dot_general"(%841, %837) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%843 = chlo.broadcast_multiply %842, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%844 = chlo.broadcast_add %843, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%845 = "mhlo.reduce"(%844, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%846 = linalg.tensor_expand_shape %845 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%847 = chlo.broadcast_subtract %844, %846 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%848 = "mhlo.exponential"(%847) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%849 = "mhlo.reduce"(%848, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%850 = linalg.tensor_expand_shape %849 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%851 = chlo.broadcast_divide %848, %850 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%852 = "mhlo.dot_general"(%851, %826) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%853 = "mhlo.transpose"(%852) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%854 = "mhlo.reshape"(%853) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%855 = "mhlo.dot"(%854, %cst_998) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%856 = chlo.broadcast_add %855, %cst_997 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%857 = "mhlo.reshape"(%856) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%858 = "mhlo.dot"(%822, %cst_1011) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%859 = chlo.broadcast_add %858, %cst_1010 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%860 = "mhlo.reshape"(%859) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%861 = chlo.broadcast_multiply %860, %cst_1009 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%862 = chlo.broadcast_add %861, %cst_997 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%863 = chlo.broadcast_add %857, %862 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%864 = chlo.broadcast_multiply %863, %cst_996 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%865 = chlo.broadcast_add %864, %cst_995 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%866 = "mhlo.reshape"(%865) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%867 = "mhlo.dot"(%866, %cst_1013) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%868 = chlo.broadcast_add %867, %cst_1012 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%869 = "mhlo.reshape"(%868) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%870 = chlo.broadcast_maximum %869, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%871 = "mhlo.reshape"(%870) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%872 = "mhlo.dot"(%871, %cst_1017) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%873 = chlo.broadcast_add %872, %cst_1016 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%874 = "mhlo.reshape"(%873) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%875 = chlo.broadcast_add %874, %865 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%876 = chlo.broadcast_multiply %875, %cst_1015 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%877 = chlo.broadcast_add %876, %cst_1014 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%878 = "mhlo.reshape"(%877) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%879 = "mhlo.dot"(%878, %cst_1019) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%880 = chlo.broadcast_add %879, %cst_1018 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%881 = "mhlo.reshape"(%880) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%882 = chlo.broadcast_maximum %881, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%883 = "mhlo.reshape"(%882) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%884 = "mhlo.dot"(%883, %cst_1023) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%885 = chlo.broadcast_add %884, %cst_1022 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%886 = "mhlo.reshape"(%885) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%887 = chlo.broadcast_add %886, %877 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%888 = chlo.broadcast_multiply %887, %cst_1021 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%889 = chlo.broadcast_add %888, %cst_1020 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%890 = "mhlo.reshape"(%889) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%891 = "mhlo.dot"(%890, %cst_1025) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%892 = chlo.broadcast_add %891, %cst_1024 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%893 = "mhlo.reshape"(%892) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%894 = chlo.broadcast_maximum %893, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%895 = "mhlo.reshape"(%894) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%896 = "mhlo.dot"(%895, %cst_1029) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%897 = chlo.broadcast_add %896, %cst_1028 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%898 = "mhlo.reshape"(%897) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%899 = chlo.broadcast_add %898, %889 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%900 = chlo.broadcast_multiply %899, %cst_1027 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%901 = chlo.broadcast_add %900, %cst_1026 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%902 = "mhlo.reshape"(%901) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%903 = "mhlo.dot"(%902, %cst_1031) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%904 = chlo.broadcast_add %903, %cst_1030 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%905 = "mhlo.reshape"(%904) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%906 = chlo.broadcast_maximum %905, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%907 = "mhlo.reshape"(%906) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%908 = "mhlo.dot"(%907, %cst_1039) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%909 = chlo.broadcast_add %908, %cst_1038 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%910 = "mhlo.reshape"(%909) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%911 = chlo.broadcast_add %910, %901 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%912 = chlo.broadcast_multiply %911, %cst_1033 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%913 = chlo.broadcast_add %912, %cst_1032 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%914 = "mhlo.reshape"(%913) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%915 = "mhlo.dot"(%914, %cst_1037) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%916 = chlo.broadcast_add %915, %cst_1036 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%917 = "mhlo.reshape"(%916) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%918 = chlo.broadcast_add %917, %821 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%919 = chlo.broadcast_multiply %918, %cst_1035 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%920 = chlo.broadcast_add %919, %cst_1034 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%921 = "mhlo.reshape"(%920) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%922 = "mhlo.dot"(%921, %cst_1049) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%923 = chlo.broadcast_add %922, %cst_1048 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%924 = "mhlo.reshape"(%923) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%925 = "mhlo.transpose"(%924) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%926 = "mhlo.dot"(%921, %cst_1053) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%927 = "mhlo.reshape"(%926) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%928 = "mhlo.broadcast_in_dim"(%cst_1052) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%929 = mhlo.add %927, %928 : tensor<1x384x128xf32>
%930 = chlo.broadcast_multiply %929, %cst_1051 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%931 = chlo.broadcast_add %930, %cst_1050 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%932 = "mhlo.reshape"(%931) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%933 = "mhlo.dot"(%932, %cst_1045) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%934 = chlo.broadcast_add %933, %cst_1044 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%935 = "mhlo.reshape"(%934) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%936 = "mhlo.transpose"(%935) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%937 = "mhlo.dot"(%932, %cst_1047) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%938 = chlo.broadcast_add %937, %cst_1046 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%939 = "mhlo.reshape"(%938) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%940 = "mhlo.transpose"(%939) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%941 = "mhlo.dot_general"(%940, %936) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%942 = chlo.broadcast_multiply %941, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%943 = chlo.broadcast_add %942, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%944 = "mhlo.reduce"(%943, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%945 = linalg.tensor_expand_shape %944 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%946 = chlo.broadcast_subtract %943, %945 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%947 = "mhlo.exponential"(%946) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%948 = "mhlo.reduce"(%947, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%949 = linalg.tensor_expand_shape %948 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%950 = chlo.broadcast_divide %947, %949 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%951 = "mhlo.dot_general"(%950, %925) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%952 = "mhlo.transpose"(%951) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%953 = "mhlo.reshape"(%952) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%954 = "mhlo.dot"(%953, %cst_1043) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%955 = chlo.broadcast_add %954, %cst_1042 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%956 = "mhlo.reshape"(%955) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%957 = "mhlo.dot"(%921, %cst_1056) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%958 = chlo.broadcast_add %957, %cst_1055 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%959 = "mhlo.reshape"(%958) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%960 = chlo.broadcast_multiply %959, %cst_1054 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%961 = chlo.broadcast_add %960, %cst_1042 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%962 = chlo.broadcast_add %956, %961 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%963 = chlo.broadcast_multiply %962, %cst_1041 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%964 = chlo.broadcast_add %963, %cst_1040 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%965 = "mhlo.reshape"(%964) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%966 = "mhlo.dot"(%965, %cst_1058) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%967 = chlo.broadcast_add %966, %cst_1057 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%968 = "mhlo.reshape"(%967) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%969 = chlo.broadcast_maximum %968, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%970 = "mhlo.reshape"(%969) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%971 = "mhlo.dot"(%970, %cst_1062) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%972 = chlo.broadcast_add %971, %cst_1061 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%973 = "mhlo.reshape"(%972) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%974 = chlo.broadcast_add %973, %964 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%975 = chlo.broadcast_multiply %974, %cst_1060 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%976 = chlo.broadcast_add %975, %cst_1059 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%977 = "mhlo.reshape"(%976) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%978 = "mhlo.dot"(%977, %cst_1064) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%979 = chlo.broadcast_add %978, %cst_1063 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%980 = "mhlo.reshape"(%979) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%981 = chlo.broadcast_maximum %980, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%982 = "mhlo.reshape"(%981) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%983 = "mhlo.dot"(%982, %cst_1068) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%984 = chlo.broadcast_add %983, %cst_1067 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%985 = "mhlo.reshape"(%984) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%986 = chlo.broadcast_add %985, %976 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%987 = chlo.broadcast_multiply %986, %cst_1066 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%988 = chlo.broadcast_add %987, %cst_1065 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%989 = "mhlo.reshape"(%988) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%990 = "mhlo.dot"(%989, %cst_1070) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%991 = chlo.broadcast_add %990, %cst_1069 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%992 = "mhlo.reshape"(%991) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%993 = chlo.broadcast_maximum %992, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%994 = "mhlo.reshape"(%993) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%995 = "mhlo.dot"(%994, %cst_1074) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%996 = chlo.broadcast_add %995, %cst_1073 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%997 = "mhlo.reshape"(%996) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%998 = chlo.broadcast_add %997, %988 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%999 = chlo.broadcast_multiply %998, %cst_1072 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1000 = chlo.broadcast_add %999, %cst_1071 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1001 = "mhlo.reshape"(%1000) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1002 = "mhlo.dot"(%1001, %cst_1076) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1003 = chlo.broadcast_add %1002, %cst_1075 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1004 = "mhlo.reshape"(%1003) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1005 = chlo.broadcast_maximum %1004, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1006 = "mhlo.reshape"(%1005) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1007 = "mhlo.dot"(%1006, %cst_1084) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1008 = chlo.broadcast_add %1007, %cst_1083 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1009 = "mhlo.reshape"(%1008) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1010 = chlo.broadcast_add %1009, %1000 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1011 = chlo.broadcast_multiply %1010, %cst_1078 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1012 = chlo.broadcast_add %1011, %cst_1077 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1013 = "mhlo.reshape"(%1012) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1014 = "mhlo.dot"(%1013, %cst_1082) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1015 = chlo.broadcast_add %1014, %cst_1081 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1016 = "mhlo.reshape"(%1015) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1017 = chlo.broadcast_add %1016, %920 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1018 = chlo.broadcast_multiply %1017, %cst_1080 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1019 = chlo.broadcast_add %1018, %cst_1079 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1020 = "mhlo.reshape"(%1019) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1021 = "mhlo.dot"(%1020, %cst_104) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1022 = chlo.broadcast_add %1021, %cst_103 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1023 = "mhlo.reshape"(%1022) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1024 = "mhlo.transpose"(%1023) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1025 = "mhlo.dot"(%1020, %cst_108) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1026 = "mhlo.reshape"(%1025) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1027 = "mhlo.broadcast_in_dim"(%cst_107) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1028 = mhlo.add %1026, %1027 : tensor<1x384x128xf32>
%1029 = chlo.broadcast_multiply %1028, %cst_106 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1030 = chlo.broadcast_add %1029, %cst_105 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1031 = "mhlo.reshape"(%1030) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1032 = "mhlo.dot"(%1031, %cst_100) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1033 = chlo.broadcast_add %1032, %cst_99 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1034 = "mhlo.reshape"(%1033) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1035 = "mhlo.transpose"(%1034) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1036 = "mhlo.dot"(%1031, %cst_102) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1037 = chlo.broadcast_add %1036, %cst_101 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1038 = "mhlo.reshape"(%1037) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1039 = "mhlo.transpose"(%1038) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1040 = "mhlo.dot_general"(%1039, %1035) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1041 = chlo.broadcast_multiply %1040, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1042 = chlo.broadcast_add %1041, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1043 = "mhlo.reduce"(%1042, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1044 = linalg.tensor_expand_shape %1043 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1045 = chlo.broadcast_subtract %1042, %1044 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1046 = "mhlo.exponential"(%1045) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1047 = "mhlo.reduce"(%1046, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1048 = linalg.tensor_expand_shape %1047 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1049 = chlo.broadcast_divide %1046, %1048 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1050 = "mhlo.dot_general"(%1049, %1024) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1051 = "mhlo.transpose"(%1050) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1052 = "mhlo.reshape"(%1051) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1053 = "mhlo.dot"(%1052, %cst_98) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1054 = chlo.broadcast_add %1053, %cst_97 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1055 = "mhlo.reshape"(%1054) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1056 = "mhlo.dot"(%1020, %cst_111) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1057 = chlo.broadcast_add %1056, %cst_110 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1058 = "mhlo.reshape"(%1057) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1059 = chlo.broadcast_multiply %1058, %cst_109 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1060 = chlo.broadcast_add %1059, %cst_97 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1061 = chlo.broadcast_add %1055, %1060 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1062 = chlo.broadcast_multiply %1061, %cst_96 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1063 = chlo.broadcast_add %1062, %cst_95 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1064 = "mhlo.reshape"(%1063) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1065 = "mhlo.dot"(%1064, %cst_113) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1066 = chlo.broadcast_add %1065, %cst_112 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1067 = "mhlo.reshape"(%1066) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1068 = chlo.broadcast_maximum %1067, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1069 = "mhlo.reshape"(%1068) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1070 = "mhlo.dot"(%1069, %cst_117) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1071 = chlo.broadcast_add %1070, %cst_116 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1072 = "mhlo.reshape"(%1071) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1073 = chlo.broadcast_add %1072, %1063 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1074 = chlo.broadcast_multiply %1073, %cst_115 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1075 = chlo.broadcast_add %1074, %cst_114 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1076 = "mhlo.reshape"(%1075) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1077 = "mhlo.dot"(%1076, %cst_119) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1078 = chlo.broadcast_add %1077, %cst_118 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1079 = "mhlo.reshape"(%1078) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1080 = chlo.broadcast_maximum %1079, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1081 = "mhlo.reshape"(%1080) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1082 = "mhlo.dot"(%1081, %cst_123) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1083 = chlo.broadcast_add %1082, %cst_122 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1084 = "mhlo.reshape"(%1083) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1085 = chlo.broadcast_add %1084, %1075 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1086 = chlo.broadcast_multiply %1085, %cst_121 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1087 = chlo.broadcast_add %1086, %cst_120 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1088 = "mhlo.reshape"(%1087) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1089 = "mhlo.dot"(%1088, %cst_125) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1090 = chlo.broadcast_add %1089, %cst_124 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1091 = "mhlo.reshape"(%1090) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1092 = chlo.broadcast_maximum %1091, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1093 = "mhlo.reshape"(%1092) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1094 = "mhlo.dot"(%1093, %cst_129) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1095 = chlo.broadcast_add %1094, %cst_128 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1096 = "mhlo.reshape"(%1095) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1097 = chlo.broadcast_add %1096, %1087 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1098 = chlo.broadcast_multiply %1097, %cst_127 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1099 = chlo.broadcast_add %1098, %cst_126 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1100 = "mhlo.reshape"(%1099) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1101 = "mhlo.dot"(%1100, %cst_131) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1102 = chlo.broadcast_add %1101, %cst_130 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1103 = "mhlo.reshape"(%1102) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1104 = chlo.broadcast_maximum %1103, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1105 = "mhlo.reshape"(%1104) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1106 = "mhlo.dot"(%1105, %cst_139) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1107 = chlo.broadcast_add %1106, %cst_138 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1108 = "mhlo.reshape"(%1107) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1109 = chlo.broadcast_add %1108, %1099 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1110 = chlo.broadcast_multiply %1109, %cst_133 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1111 = chlo.broadcast_add %1110, %cst_132 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1112 = "mhlo.reshape"(%1111) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1113 = "mhlo.dot"(%1112, %cst_137) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1114 = chlo.broadcast_add %1113, %cst_136 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1115 = "mhlo.reshape"(%1114) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1116 = chlo.broadcast_add %1115, %1019 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1117 = chlo.broadcast_multiply %1116, %cst_135 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1118 = chlo.broadcast_add %1117, %cst_134 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1119 = "mhlo.reshape"(%1118) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1120 = "mhlo.dot"(%1119, %cst_149) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1121 = chlo.broadcast_add %1120, %cst_148 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1122 = "mhlo.reshape"(%1121) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1123 = "mhlo.transpose"(%1122) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1124 = "mhlo.dot"(%1119, %cst_153) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1125 = "mhlo.reshape"(%1124) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1126 = "mhlo.broadcast_in_dim"(%cst_152) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1127 = mhlo.add %1125, %1126 : tensor<1x384x128xf32>
%1128 = chlo.broadcast_multiply %1127, %cst_151 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1129 = chlo.broadcast_add %1128, %cst_150 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1130 = "mhlo.reshape"(%1129) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1131 = "mhlo.dot"(%1130, %cst_145) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1132 = chlo.broadcast_add %1131, %cst_144 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1133 = "mhlo.reshape"(%1132) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1134 = "mhlo.transpose"(%1133) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1135 = "mhlo.dot"(%1130, %cst_147) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1136 = chlo.broadcast_add %1135, %cst_146 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1137 = "mhlo.reshape"(%1136) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1138 = "mhlo.transpose"(%1137) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1139 = "mhlo.dot_general"(%1138, %1134) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1140 = chlo.broadcast_multiply %1139, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1141 = chlo.broadcast_add %1140, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1142 = "mhlo.reduce"(%1141, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1143 = linalg.tensor_expand_shape %1142 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1144 = chlo.broadcast_subtract %1141, %1143 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1145 = "mhlo.exponential"(%1144) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1146 = "mhlo.reduce"(%1145, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1147 = linalg.tensor_expand_shape %1146 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1148 = chlo.broadcast_divide %1145, %1147 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1149 = "mhlo.dot_general"(%1148, %1123) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1150 = "mhlo.transpose"(%1149) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1151 = "mhlo.reshape"(%1150) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1152 = "mhlo.dot"(%1151, %cst_143) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1153 = chlo.broadcast_add %1152, %cst_142 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1154 = "mhlo.reshape"(%1153) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1155 = "mhlo.dot"(%1119, %cst_156) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1156 = chlo.broadcast_add %1155, %cst_155 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1157 = "mhlo.reshape"(%1156) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1158 = chlo.broadcast_multiply %1157, %cst_154 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1159 = chlo.broadcast_add %1158, %cst_142 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1160 = chlo.broadcast_add %1154, %1159 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1161 = chlo.broadcast_multiply %1160, %cst_141 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1162 = chlo.broadcast_add %1161, %cst_140 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1163 = "mhlo.reshape"(%1162) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1164 = "mhlo.dot"(%1163, %cst_158) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1165 = chlo.broadcast_add %1164, %cst_157 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1166 = "mhlo.reshape"(%1165) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1167 = chlo.broadcast_maximum %1166, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1168 = "mhlo.reshape"(%1167) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1169 = "mhlo.dot"(%1168, %cst_162) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1170 = chlo.broadcast_add %1169, %cst_161 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1171 = "mhlo.reshape"(%1170) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1172 = chlo.broadcast_add %1171, %1162 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1173 = chlo.broadcast_multiply %1172, %cst_160 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1174 = chlo.broadcast_add %1173, %cst_159 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1175 = "mhlo.reshape"(%1174) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1176 = "mhlo.dot"(%1175, %cst_164) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1177 = chlo.broadcast_add %1176, %cst_163 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1178 = "mhlo.reshape"(%1177) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1179 = chlo.broadcast_maximum %1178, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1180 = "mhlo.reshape"(%1179) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1181 = "mhlo.dot"(%1180, %cst_168) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1182 = chlo.broadcast_add %1181, %cst_167 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1183 = "mhlo.reshape"(%1182) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1184 = chlo.broadcast_add %1183, %1174 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1185 = chlo.broadcast_multiply %1184, %cst_166 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1186 = chlo.broadcast_add %1185, %cst_165 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1187 = "mhlo.reshape"(%1186) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1188 = "mhlo.dot"(%1187, %cst_170) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1189 = chlo.broadcast_add %1188, %cst_169 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1190 = "mhlo.reshape"(%1189) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1191 = chlo.broadcast_maximum %1190, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1192 = "mhlo.reshape"(%1191) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1193 = "mhlo.dot"(%1192, %cst_174) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1194 = chlo.broadcast_add %1193, %cst_173 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1195 = "mhlo.reshape"(%1194) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1196 = chlo.broadcast_add %1195, %1186 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1197 = chlo.broadcast_multiply %1196, %cst_172 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1198 = chlo.broadcast_add %1197, %cst_171 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1199 = "mhlo.reshape"(%1198) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1200 = "mhlo.dot"(%1199, %cst_176) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1201 = chlo.broadcast_add %1200, %cst_175 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1202 = "mhlo.reshape"(%1201) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1203 = chlo.broadcast_maximum %1202, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1204 = "mhlo.reshape"(%1203) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1205 = "mhlo.dot"(%1204, %cst_184) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1206 = chlo.broadcast_add %1205, %cst_183 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1207 = "mhlo.reshape"(%1206) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1208 = chlo.broadcast_add %1207, %1198 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1209 = chlo.broadcast_multiply %1208, %cst_178 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1210 = chlo.broadcast_add %1209, %cst_177 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1211 = "mhlo.reshape"(%1210) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1212 = "mhlo.dot"(%1211, %cst_182) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1213 = chlo.broadcast_add %1212, %cst_181 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1214 = "mhlo.reshape"(%1213) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1215 = chlo.broadcast_add %1214, %1118 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1216 = chlo.broadcast_multiply %1215, %cst_180 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1217 = chlo.broadcast_add %1216, %cst_179 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1218 = "mhlo.reshape"(%1217) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1219 = "mhlo.dot"(%1218, %cst_194) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1220 = chlo.broadcast_add %1219, %cst_193 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1221 = "mhlo.reshape"(%1220) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1222 = "mhlo.transpose"(%1221) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1223 = "mhlo.dot"(%1218, %cst_198) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1224 = "mhlo.reshape"(%1223) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1225 = "mhlo.broadcast_in_dim"(%cst_197) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1226 = mhlo.add %1224, %1225 : tensor<1x384x128xf32>
%1227 = chlo.broadcast_multiply %1226, %cst_196 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1228 = chlo.broadcast_add %1227, %cst_195 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1229 = "mhlo.reshape"(%1228) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1230 = "mhlo.dot"(%1229, %cst_190) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1231 = chlo.broadcast_add %1230, %cst_189 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1232 = "mhlo.reshape"(%1231) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1233 = "mhlo.transpose"(%1232) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1234 = "mhlo.dot"(%1229, %cst_192) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1235 = chlo.broadcast_add %1234, %cst_191 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1236 = "mhlo.reshape"(%1235) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1237 = "mhlo.transpose"(%1236) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1238 = "mhlo.dot_general"(%1237, %1233) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1239 = chlo.broadcast_multiply %1238, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1240 = chlo.broadcast_add %1239, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1241 = "mhlo.reduce"(%1240, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1242 = linalg.tensor_expand_shape %1241 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1243 = chlo.broadcast_subtract %1240, %1242 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1244 = "mhlo.exponential"(%1243) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1245 = "mhlo.reduce"(%1244, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1246 = linalg.tensor_expand_shape %1245 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1247 = chlo.broadcast_divide %1244, %1246 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1248 = "mhlo.dot_general"(%1247, %1222) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1249 = "mhlo.transpose"(%1248) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1250 = "mhlo.reshape"(%1249) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1251 = "mhlo.dot"(%1250, %cst_188) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1252 = chlo.broadcast_add %1251, %cst_187 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1253 = "mhlo.reshape"(%1252) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1254 = "mhlo.dot"(%1218, %cst_201) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1255 = chlo.broadcast_add %1254, %cst_200 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1256 = "mhlo.reshape"(%1255) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1257 = chlo.broadcast_multiply %1256, %cst_199 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1258 = chlo.broadcast_add %1257, %cst_187 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1259 = chlo.broadcast_add %1253, %1258 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1260 = chlo.broadcast_multiply %1259, %cst_186 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1261 = chlo.broadcast_add %1260, %cst_185 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1262 = "mhlo.reshape"(%1261) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1263 = "mhlo.dot"(%1262, %cst_203) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1264 = chlo.broadcast_add %1263, %cst_202 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1265 = "mhlo.reshape"(%1264) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1266 = chlo.broadcast_maximum %1265, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1267 = "mhlo.reshape"(%1266) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1268 = "mhlo.dot"(%1267, %cst_207) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1269 = chlo.broadcast_add %1268, %cst_206 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1270 = "mhlo.reshape"(%1269) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1271 = chlo.broadcast_add %1270, %1261 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1272 = chlo.broadcast_multiply %1271, %cst_205 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1273 = chlo.broadcast_add %1272, %cst_204 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1274 = "mhlo.reshape"(%1273) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1275 = "mhlo.dot"(%1274, %cst_209) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1276 = chlo.broadcast_add %1275, %cst_208 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1277 = "mhlo.reshape"(%1276) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1278 = chlo.broadcast_maximum %1277, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1279 = "mhlo.reshape"(%1278) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1280 = "mhlo.dot"(%1279, %cst_213) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1281 = chlo.broadcast_add %1280, %cst_212 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1282 = "mhlo.reshape"(%1281) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1283 = chlo.broadcast_add %1282, %1273 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1284 = chlo.broadcast_multiply %1283, %cst_211 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1285 = chlo.broadcast_add %1284, %cst_210 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1286 = "mhlo.reshape"(%1285) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1287 = "mhlo.dot"(%1286, %cst_215) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1288 = chlo.broadcast_add %1287, %cst_214 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1289 = "mhlo.reshape"(%1288) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1290 = chlo.broadcast_maximum %1289, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1291 = "mhlo.reshape"(%1290) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1292 = "mhlo.dot"(%1291, %cst_219) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1293 = chlo.broadcast_add %1292, %cst_218 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1294 = "mhlo.reshape"(%1293) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1295 = chlo.broadcast_add %1294, %1285 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1296 = chlo.broadcast_multiply %1295, %cst_217 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1297 = chlo.broadcast_add %1296, %cst_216 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1298 = "mhlo.reshape"(%1297) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1299 = "mhlo.dot"(%1298, %cst_221) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1300 = chlo.broadcast_add %1299, %cst_220 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1301 = "mhlo.reshape"(%1300) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1302 = chlo.broadcast_maximum %1301, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1303 = "mhlo.reshape"(%1302) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1304 = "mhlo.dot"(%1303, %cst_229) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1305 = chlo.broadcast_add %1304, %cst_228 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1306 = "mhlo.reshape"(%1305) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1307 = chlo.broadcast_add %1306, %1297 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1308 = chlo.broadcast_multiply %1307, %cst_223 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1309 = chlo.broadcast_add %1308, %cst_222 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1310 = "mhlo.reshape"(%1309) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1311 = "mhlo.dot"(%1310, %cst_227) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1312 = chlo.broadcast_add %1311, %cst_226 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1313 = "mhlo.reshape"(%1312) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1314 = chlo.broadcast_add %1313, %1217 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1315 = chlo.broadcast_multiply %1314, %cst_225 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1316 = chlo.broadcast_add %1315, %cst_224 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1317 = "mhlo.reshape"(%1316) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1318 = "mhlo.dot"(%1317, %cst_239) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1319 = chlo.broadcast_add %1318, %cst_238 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1320 = "mhlo.reshape"(%1319) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1321 = "mhlo.transpose"(%1320) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1322 = "mhlo.dot"(%1317, %cst_243) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1323 = "mhlo.reshape"(%1322) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1324 = "mhlo.broadcast_in_dim"(%cst_242) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1325 = mhlo.add %1323, %1324 : tensor<1x384x128xf32>
%1326 = chlo.broadcast_multiply %1325, %cst_241 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1327 = chlo.broadcast_add %1326, %cst_240 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1328 = "mhlo.reshape"(%1327) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1329 = "mhlo.dot"(%1328, %cst_235) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1330 = chlo.broadcast_add %1329, %cst_234 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1331 = "mhlo.reshape"(%1330) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1332 = "mhlo.transpose"(%1331) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1333 = "mhlo.dot"(%1328, %cst_237) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1334 = chlo.broadcast_add %1333, %cst_236 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1335 = "mhlo.reshape"(%1334) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1336 = "mhlo.transpose"(%1335) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1337 = "mhlo.dot_general"(%1336, %1332) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1338 = chlo.broadcast_multiply %1337, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1339 = chlo.broadcast_add %1338, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1340 = "mhlo.reduce"(%1339, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1341 = linalg.tensor_expand_shape %1340 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1342 = chlo.broadcast_subtract %1339, %1341 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1343 = "mhlo.exponential"(%1342) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1344 = "mhlo.reduce"(%1343, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1345 = linalg.tensor_expand_shape %1344 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1346 = chlo.broadcast_divide %1343, %1345 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1347 = "mhlo.dot_general"(%1346, %1321) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1348 = "mhlo.transpose"(%1347) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1349 = "mhlo.reshape"(%1348) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1350 = "mhlo.dot"(%1349, %cst_233) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1351 = chlo.broadcast_add %1350, %cst_232 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1352 = "mhlo.reshape"(%1351) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1353 = "mhlo.dot"(%1317, %cst_246) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1354 = chlo.broadcast_add %1353, %cst_245 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1355 = "mhlo.reshape"(%1354) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1356 = chlo.broadcast_multiply %1355, %cst_244 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1357 = chlo.broadcast_add %1356, %cst_232 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1358 = chlo.broadcast_add %1352, %1357 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1359 = chlo.broadcast_multiply %1358, %cst_231 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1360 = chlo.broadcast_add %1359, %cst_230 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1361 = "mhlo.reshape"(%1360) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1362 = "mhlo.dot"(%1361, %cst_248) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1363 = chlo.broadcast_add %1362, %cst_247 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1364 = "mhlo.reshape"(%1363) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1365 = chlo.broadcast_maximum %1364, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1366 = "mhlo.reshape"(%1365) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1367 = "mhlo.dot"(%1366, %cst_252) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1368 = chlo.broadcast_add %1367, %cst_251 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1369 = "mhlo.reshape"(%1368) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1370 = chlo.broadcast_add %1369, %1360 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1371 = chlo.broadcast_multiply %1370, %cst_250 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1372 = chlo.broadcast_add %1371, %cst_249 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1373 = "mhlo.reshape"(%1372) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1374 = "mhlo.dot"(%1373, %cst_254) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1375 = chlo.broadcast_add %1374, %cst_253 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1376 = "mhlo.reshape"(%1375) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1377 = chlo.broadcast_maximum %1376, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1378 = "mhlo.reshape"(%1377) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1379 = "mhlo.dot"(%1378, %cst_258) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1380 = chlo.broadcast_add %1379, %cst_257 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1381 = "mhlo.reshape"(%1380) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1382 = chlo.broadcast_add %1381, %1372 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1383 = chlo.broadcast_multiply %1382, %cst_256 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1384 = chlo.broadcast_add %1383, %cst_255 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1385 = "mhlo.reshape"(%1384) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1386 = "mhlo.dot"(%1385, %cst_260) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1387 = chlo.broadcast_add %1386, %cst_259 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1388 = "mhlo.reshape"(%1387) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1389 = chlo.broadcast_maximum %1388, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1390 = "mhlo.reshape"(%1389) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1391 = "mhlo.dot"(%1390, %cst_264) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1392 = chlo.broadcast_add %1391, %cst_263 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1393 = "mhlo.reshape"(%1392) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1394 = chlo.broadcast_add %1393, %1384 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1395 = chlo.broadcast_multiply %1394, %cst_262 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1396 = chlo.broadcast_add %1395, %cst_261 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1397 = "mhlo.reshape"(%1396) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1398 = "mhlo.dot"(%1397, %cst_266) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1399 = chlo.broadcast_add %1398, %cst_265 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1400 = "mhlo.reshape"(%1399) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1401 = chlo.broadcast_maximum %1400, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1402 = "mhlo.reshape"(%1401) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1403 = "mhlo.dot"(%1402, %cst_274) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1404 = chlo.broadcast_add %1403, %cst_273 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1405 = "mhlo.reshape"(%1404) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1406 = chlo.broadcast_add %1405, %1396 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1407 = chlo.broadcast_multiply %1406, %cst_268 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1408 = chlo.broadcast_add %1407, %cst_267 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1409 = "mhlo.reshape"(%1408) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1410 = "mhlo.dot"(%1409, %cst_272) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1411 = chlo.broadcast_add %1410, %cst_271 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1412 = "mhlo.reshape"(%1411) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1413 = chlo.broadcast_add %1412, %1316 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1414 = chlo.broadcast_multiply %1413, %cst_270 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1415 = chlo.broadcast_add %1414, %cst_269 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1416 = "mhlo.reshape"(%1415) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1417 = "mhlo.dot"(%1416, %cst_284) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1418 = chlo.broadcast_add %1417, %cst_283 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1419 = "mhlo.reshape"(%1418) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1420 = "mhlo.transpose"(%1419) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1421 = "mhlo.dot"(%1416, %cst_288) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1422 = "mhlo.reshape"(%1421) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1423 = "mhlo.broadcast_in_dim"(%cst_287) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1424 = mhlo.add %1422, %1423 : tensor<1x384x128xf32>
%1425 = chlo.broadcast_multiply %1424, %cst_286 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1426 = chlo.broadcast_add %1425, %cst_285 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1427 = "mhlo.reshape"(%1426) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1428 = "mhlo.dot"(%1427, %cst_280) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1429 = chlo.broadcast_add %1428, %cst_279 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1430 = "mhlo.reshape"(%1429) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1431 = "mhlo.transpose"(%1430) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1432 = "mhlo.dot"(%1427, %cst_282) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1433 = chlo.broadcast_add %1432, %cst_281 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1434 = "mhlo.reshape"(%1433) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1435 = "mhlo.transpose"(%1434) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1436 = "mhlo.dot_general"(%1435, %1431) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1437 = chlo.broadcast_multiply %1436, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1438 = chlo.broadcast_add %1437, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1439 = "mhlo.reduce"(%1438, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1440 = linalg.tensor_expand_shape %1439 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1441 = chlo.broadcast_subtract %1438, %1440 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1442 = "mhlo.exponential"(%1441) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1443 = "mhlo.reduce"(%1442, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1444 = linalg.tensor_expand_shape %1443 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1445 = chlo.broadcast_divide %1442, %1444 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1446 = "mhlo.dot_general"(%1445, %1420) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1447 = "mhlo.transpose"(%1446) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1448 = "mhlo.reshape"(%1447) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1449 = "mhlo.dot"(%1448, %cst_278) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1450 = chlo.broadcast_add %1449, %cst_277 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1451 = "mhlo.reshape"(%1450) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1452 = "mhlo.dot"(%1416, %cst_291) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1453 = chlo.broadcast_add %1452, %cst_290 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1454 = "mhlo.reshape"(%1453) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1455 = chlo.broadcast_multiply %1454, %cst_289 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1456 = chlo.broadcast_add %1455, %cst_277 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1457 = chlo.broadcast_add %1451, %1456 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1458 = chlo.broadcast_multiply %1457, %cst_276 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1459 = chlo.broadcast_add %1458, %cst_275 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1460 = "mhlo.reshape"(%1459) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1461 = "mhlo.dot"(%1460, %cst_293) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1462 = chlo.broadcast_add %1461, %cst_292 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1463 = "mhlo.reshape"(%1462) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1464 = chlo.broadcast_maximum %1463, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1465 = "mhlo.reshape"(%1464) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1466 = "mhlo.dot"(%1465, %cst_297) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1467 = chlo.broadcast_add %1466, %cst_296 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1468 = "mhlo.reshape"(%1467) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1469 = chlo.broadcast_add %1468, %1459 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1470 = chlo.broadcast_multiply %1469, %cst_295 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1471 = chlo.broadcast_add %1470, %cst_294 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1472 = "mhlo.reshape"(%1471) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1473 = "mhlo.dot"(%1472, %cst_299) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1474 = chlo.broadcast_add %1473, %cst_298 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1475 = "mhlo.reshape"(%1474) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1476 = chlo.broadcast_maximum %1475, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1477 = "mhlo.reshape"(%1476) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1478 = "mhlo.dot"(%1477, %cst_303) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1479 = chlo.broadcast_add %1478, %cst_302 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1480 = "mhlo.reshape"(%1479) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1481 = chlo.broadcast_add %1480, %1471 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1482 = chlo.broadcast_multiply %1481, %cst_301 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1483 = chlo.broadcast_add %1482, %cst_300 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1484 = "mhlo.reshape"(%1483) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1485 = "mhlo.dot"(%1484, %cst_305) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1486 = chlo.broadcast_add %1485, %cst_304 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1487 = "mhlo.reshape"(%1486) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1488 = chlo.broadcast_maximum %1487, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1489 = "mhlo.reshape"(%1488) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1490 = "mhlo.dot"(%1489, %cst_309) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1491 = chlo.broadcast_add %1490, %cst_308 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1492 = "mhlo.reshape"(%1491) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1493 = chlo.broadcast_add %1492, %1483 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1494 = chlo.broadcast_multiply %1493, %cst_307 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1495 = chlo.broadcast_add %1494, %cst_306 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1496 = "mhlo.reshape"(%1495) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1497 = "mhlo.dot"(%1496, %cst_311) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1498 = chlo.broadcast_add %1497, %cst_310 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1499 = "mhlo.reshape"(%1498) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1500 = chlo.broadcast_maximum %1499, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1501 = "mhlo.reshape"(%1500) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1502 = "mhlo.dot"(%1501, %cst_319) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1503 = chlo.broadcast_add %1502, %cst_318 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1504 = "mhlo.reshape"(%1503) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1505 = chlo.broadcast_add %1504, %1495 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1506 = chlo.broadcast_multiply %1505, %cst_313 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1507 = chlo.broadcast_add %1506, %cst_312 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1508 = "mhlo.reshape"(%1507) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1509 = "mhlo.dot"(%1508, %cst_317) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1510 = chlo.broadcast_add %1509, %cst_316 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1511 = "mhlo.reshape"(%1510) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1512 = chlo.broadcast_add %1511, %1415 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1513 = chlo.broadcast_multiply %1512, %cst_315 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1514 = chlo.broadcast_add %1513, %cst_314 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1515 = "mhlo.reshape"(%1514) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1516 = "mhlo.dot"(%1515, %cst_329) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1517 = chlo.broadcast_add %1516, %cst_328 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1518 = "mhlo.reshape"(%1517) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1519 = "mhlo.transpose"(%1518) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1520 = "mhlo.dot"(%1515, %cst_333) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1521 = "mhlo.reshape"(%1520) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1522 = "mhlo.broadcast_in_dim"(%cst_332) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1523 = mhlo.add %1521, %1522 : tensor<1x384x128xf32>
%1524 = chlo.broadcast_multiply %1523, %cst_331 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1525 = chlo.broadcast_add %1524, %cst_330 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1526 = "mhlo.reshape"(%1525) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1527 = "mhlo.dot"(%1526, %cst_325) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1528 = chlo.broadcast_add %1527, %cst_324 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1529 = "mhlo.reshape"(%1528) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1530 = "mhlo.transpose"(%1529) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1531 = "mhlo.dot"(%1526, %cst_327) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1532 = chlo.broadcast_add %1531, %cst_326 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1533 = "mhlo.reshape"(%1532) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1534 = "mhlo.transpose"(%1533) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1535 = "mhlo.dot_general"(%1534, %1530) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1536 = chlo.broadcast_multiply %1535, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1537 = chlo.broadcast_add %1536, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1538 = "mhlo.reduce"(%1537, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1539 = linalg.tensor_expand_shape %1538 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1540 = chlo.broadcast_subtract %1537, %1539 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1541 = "mhlo.exponential"(%1540) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1542 = "mhlo.reduce"(%1541, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1543 = linalg.tensor_expand_shape %1542 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1544 = chlo.broadcast_divide %1541, %1543 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1545 = "mhlo.dot_general"(%1544, %1519) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1546 = "mhlo.transpose"(%1545) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1547 = "mhlo.reshape"(%1546) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1548 = "mhlo.dot"(%1547, %cst_323) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1549 = chlo.broadcast_add %1548, %cst_322 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1550 = "mhlo.reshape"(%1549) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1551 = "mhlo.dot"(%1515, %cst_336) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1552 = chlo.broadcast_add %1551, %cst_335 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1553 = "mhlo.reshape"(%1552) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1554 = chlo.broadcast_multiply %1553, %cst_334 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1555 = chlo.broadcast_add %1554, %cst_322 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1556 = chlo.broadcast_add %1550, %1555 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1557 = chlo.broadcast_multiply %1556, %cst_321 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1558 = chlo.broadcast_add %1557, %cst_320 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1559 = "mhlo.reshape"(%1558) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1560 = "mhlo.dot"(%1559, %cst_338) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1561 = chlo.broadcast_add %1560, %cst_337 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1562 = "mhlo.reshape"(%1561) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1563 = chlo.broadcast_maximum %1562, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1564 = "mhlo.reshape"(%1563) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1565 = "mhlo.dot"(%1564, %cst_342) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1566 = chlo.broadcast_add %1565, %cst_341 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1567 = "mhlo.reshape"(%1566) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1568 = chlo.broadcast_add %1567, %1558 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1569 = chlo.broadcast_multiply %1568, %cst_340 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1570 = chlo.broadcast_add %1569, %cst_339 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1571 = "mhlo.reshape"(%1570) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1572 = "mhlo.dot"(%1571, %cst_344) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1573 = chlo.broadcast_add %1572, %cst_343 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1574 = "mhlo.reshape"(%1573) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1575 = chlo.broadcast_maximum %1574, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1576 = "mhlo.reshape"(%1575) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1577 = "mhlo.dot"(%1576, %cst_348) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1578 = chlo.broadcast_add %1577, %cst_347 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1579 = "mhlo.reshape"(%1578) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1580 = chlo.broadcast_add %1579, %1570 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1581 = chlo.broadcast_multiply %1580, %cst_346 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1582 = chlo.broadcast_add %1581, %cst_345 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1583 = "mhlo.reshape"(%1582) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1584 = "mhlo.dot"(%1583, %cst_350) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1585 = chlo.broadcast_add %1584, %cst_349 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1586 = "mhlo.reshape"(%1585) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1587 = chlo.broadcast_maximum %1586, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1588 = "mhlo.reshape"(%1587) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1589 = "mhlo.dot"(%1588, %cst_354) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1590 = chlo.broadcast_add %1589, %cst_353 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1591 = "mhlo.reshape"(%1590) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1592 = chlo.broadcast_add %1591, %1582 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1593 = chlo.broadcast_multiply %1592, %cst_352 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1594 = chlo.broadcast_add %1593, %cst_351 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1595 = "mhlo.reshape"(%1594) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1596 = "mhlo.dot"(%1595, %cst_356) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1597 = chlo.broadcast_add %1596, %cst_355 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1598 = "mhlo.reshape"(%1597) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1599 = chlo.broadcast_maximum %1598, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1600 = "mhlo.reshape"(%1599) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1601 = "mhlo.dot"(%1600, %cst_364) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1602 = chlo.broadcast_add %1601, %cst_363 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1603 = "mhlo.reshape"(%1602) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1604 = chlo.broadcast_add %1603, %1594 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1605 = chlo.broadcast_multiply %1604, %cst_358 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1606 = chlo.broadcast_add %1605, %cst_357 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1607 = "mhlo.reshape"(%1606) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1608 = "mhlo.dot"(%1607, %cst_362) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1609 = chlo.broadcast_add %1608, %cst_361 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1610 = "mhlo.reshape"(%1609) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1611 = chlo.broadcast_add %1610, %1514 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1612 = chlo.broadcast_multiply %1611, %cst_360 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1613 = chlo.broadcast_add %1612, %cst_359 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1614 = "mhlo.reshape"(%1613) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1615 = "mhlo.dot"(%1614, %cst_374) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1616 = chlo.broadcast_add %1615, %cst_373 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1617 = "mhlo.reshape"(%1616) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1618 = "mhlo.transpose"(%1617) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1619 = "mhlo.dot"(%1614, %cst_378) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1620 = "mhlo.reshape"(%1619) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1621 = "mhlo.broadcast_in_dim"(%cst_377) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1622 = mhlo.add %1620, %1621 : tensor<1x384x128xf32>
%1623 = chlo.broadcast_multiply %1622, %cst_376 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1624 = chlo.broadcast_add %1623, %cst_375 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1625 = "mhlo.reshape"(%1624) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1626 = "mhlo.dot"(%1625, %cst_370) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1627 = chlo.broadcast_add %1626, %cst_369 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1628 = "mhlo.reshape"(%1627) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1629 = "mhlo.transpose"(%1628) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1630 = "mhlo.dot"(%1625, %cst_372) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1631 = chlo.broadcast_add %1630, %cst_371 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1632 = "mhlo.reshape"(%1631) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1633 = "mhlo.transpose"(%1632) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1634 = "mhlo.dot_general"(%1633, %1629) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1635 = chlo.broadcast_multiply %1634, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1636 = chlo.broadcast_add %1635, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1637 = "mhlo.reduce"(%1636, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1638 = linalg.tensor_expand_shape %1637 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1639 = chlo.broadcast_subtract %1636, %1638 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1640 = "mhlo.exponential"(%1639) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1641 = "mhlo.reduce"(%1640, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1642 = linalg.tensor_expand_shape %1641 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1643 = chlo.broadcast_divide %1640, %1642 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1644 = "mhlo.dot_general"(%1643, %1618) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1645 = "mhlo.transpose"(%1644) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1646 = "mhlo.reshape"(%1645) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1647 = "mhlo.dot"(%1646, %cst_368) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1648 = chlo.broadcast_add %1647, %cst_367 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1649 = "mhlo.reshape"(%1648) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1650 = "mhlo.dot"(%1614, %cst_381) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1651 = chlo.broadcast_add %1650, %cst_380 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1652 = "mhlo.reshape"(%1651) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1653 = chlo.broadcast_multiply %1652, %cst_379 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1654 = chlo.broadcast_add %1653, %cst_367 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1655 = chlo.broadcast_add %1649, %1654 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1656 = chlo.broadcast_multiply %1655, %cst_366 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1657 = chlo.broadcast_add %1656, %cst_365 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1658 = "mhlo.reshape"(%1657) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1659 = "mhlo.dot"(%1658, %cst_383) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1660 = chlo.broadcast_add %1659, %cst_382 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1661 = "mhlo.reshape"(%1660) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1662 = chlo.broadcast_maximum %1661, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1663 = "mhlo.reshape"(%1662) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1664 = "mhlo.dot"(%1663, %cst_387) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1665 = chlo.broadcast_add %1664, %cst_386 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1666 = "mhlo.reshape"(%1665) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1667 = chlo.broadcast_add %1666, %1657 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1668 = chlo.broadcast_multiply %1667, %cst_385 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1669 = chlo.broadcast_add %1668, %cst_384 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1670 = "mhlo.reshape"(%1669) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1671 = "mhlo.dot"(%1670, %cst_389) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1672 = chlo.broadcast_add %1671, %cst_388 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1673 = "mhlo.reshape"(%1672) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1674 = chlo.broadcast_maximum %1673, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1675 = "mhlo.reshape"(%1674) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1676 = "mhlo.dot"(%1675, %cst_393) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1677 = chlo.broadcast_add %1676, %cst_392 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1678 = "mhlo.reshape"(%1677) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1679 = chlo.broadcast_add %1678, %1669 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1680 = chlo.broadcast_multiply %1679, %cst_391 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1681 = chlo.broadcast_add %1680, %cst_390 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1682 = "mhlo.reshape"(%1681) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1683 = "mhlo.dot"(%1682, %cst_395) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1684 = chlo.broadcast_add %1683, %cst_394 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1685 = "mhlo.reshape"(%1684) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1686 = chlo.broadcast_maximum %1685, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1687 = "mhlo.reshape"(%1686) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1688 = "mhlo.dot"(%1687, %cst_399) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1689 = chlo.broadcast_add %1688, %cst_398 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1690 = "mhlo.reshape"(%1689) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1691 = chlo.broadcast_add %1690, %1681 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1692 = chlo.broadcast_multiply %1691, %cst_397 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1693 = chlo.broadcast_add %1692, %cst_396 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1694 = "mhlo.reshape"(%1693) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1695 = "mhlo.dot"(%1694, %cst_401) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1696 = chlo.broadcast_add %1695, %cst_400 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1697 = "mhlo.reshape"(%1696) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1698 = chlo.broadcast_maximum %1697, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1699 = "mhlo.reshape"(%1698) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1700 = "mhlo.dot"(%1699, %cst_409) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1701 = chlo.broadcast_add %1700, %cst_408 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1702 = "mhlo.reshape"(%1701) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1703 = chlo.broadcast_add %1702, %1693 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1704 = chlo.broadcast_multiply %1703, %cst_403 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1705 = chlo.broadcast_add %1704, %cst_402 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1706 = "mhlo.reshape"(%1705) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1707 = "mhlo.dot"(%1706, %cst_407) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1708 = chlo.broadcast_add %1707, %cst_406 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1709 = "mhlo.reshape"(%1708) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1710 = chlo.broadcast_add %1709, %1613 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1711 = chlo.broadcast_multiply %1710, %cst_405 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1712 = chlo.broadcast_add %1711, %cst_404 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1713 = "mhlo.reshape"(%1712) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1714 = "mhlo.dot"(%1713, %cst_419) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1715 = chlo.broadcast_add %1714, %cst_418 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1716 = "mhlo.reshape"(%1715) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1717 = "mhlo.transpose"(%1716) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1718 = "mhlo.dot"(%1713, %cst_423) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1719 = "mhlo.reshape"(%1718) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1720 = "mhlo.broadcast_in_dim"(%cst_422) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1721 = mhlo.add %1719, %1720 : tensor<1x384x128xf32>
%1722 = chlo.broadcast_multiply %1721, %cst_421 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1723 = chlo.broadcast_add %1722, %cst_420 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1724 = "mhlo.reshape"(%1723) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1725 = "mhlo.dot"(%1724, %cst_415) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1726 = chlo.broadcast_add %1725, %cst_414 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1727 = "mhlo.reshape"(%1726) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1728 = "mhlo.transpose"(%1727) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1729 = "mhlo.dot"(%1724, %cst_417) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1730 = chlo.broadcast_add %1729, %cst_416 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1731 = "mhlo.reshape"(%1730) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1732 = "mhlo.transpose"(%1731) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1733 = "mhlo.dot_general"(%1732, %1728) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1734 = chlo.broadcast_multiply %1733, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1735 = chlo.broadcast_add %1734, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1736 = "mhlo.reduce"(%1735, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1737 = linalg.tensor_expand_shape %1736 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1738 = chlo.broadcast_subtract %1735, %1737 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1739 = "mhlo.exponential"(%1738) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1740 = "mhlo.reduce"(%1739, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1741 = linalg.tensor_expand_shape %1740 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1742 = chlo.broadcast_divide %1739, %1741 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1743 = "mhlo.dot_general"(%1742, %1717) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1744 = "mhlo.transpose"(%1743) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1745 = "mhlo.reshape"(%1744) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1746 = "mhlo.dot"(%1745, %cst_413) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1747 = chlo.broadcast_add %1746, %cst_412 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1748 = "mhlo.reshape"(%1747) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1749 = "mhlo.dot"(%1713, %cst_426) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1750 = chlo.broadcast_add %1749, %cst_425 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1751 = "mhlo.reshape"(%1750) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1752 = chlo.broadcast_multiply %1751, %cst_424 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1753 = chlo.broadcast_add %1752, %cst_412 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1754 = chlo.broadcast_add %1748, %1753 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1755 = chlo.broadcast_multiply %1754, %cst_411 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1756 = chlo.broadcast_add %1755, %cst_410 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1757 = "mhlo.reshape"(%1756) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1758 = "mhlo.dot"(%1757, %cst_428) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1759 = chlo.broadcast_add %1758, %cst_427 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1760 = "mhlo.reshape"(%1759) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1761 = chlo.broadcast_maximum %1760, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1762 = "mhlo.reshape"(%1761) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1763 = "mhlo.dot"(%1762, %cst_432) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1764 = chlo.broadcast_add %1763, %cst_431 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1765 = "mhlo.reshape"(%1764) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1766 = chlo.broadcast_add %1765, %1756 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1767 = chlo.broadcast_multiply %1766, %cst_430 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1768 = chlo.broadcast_add %1767, %cst_429 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1769 = "mhlo.reshape"(%1768) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1770 = "mhlo.dot"(%1769, %cst_434) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1771 = chlo.broadcast_add %1770, %cst_433 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1772 = "mhlo.reshape"(%1771) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1773 = chlo.broadcast_maximum %1772, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1774 = "mhlo.reshape"(%1773) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1775 = "mhlo.dot"(%1774, %cst_438) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1776 = chlo.broadcast_add %1775, %cst_437 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1777 = "mhlo.reshape"(%1776) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1778 = chlo.broadcast_add %1777, %1768 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1779 = chlo.broadcast_multiply %1778, %cst_436 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1780 = chlo.broadcast_add %1779, %cst_435 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1781 = "mhlo.reshape"(%1780) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1782 = "mhlo.dot"(%1781, %cst_440) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1783 = chlo.broadcast_add %1782, %cst_439 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1784 = "mhlo.reshape"(%1783) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1785 = chlo.broadcast_maximum %1784, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1786 = "mhlo.reshape"(%1785) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1787 = "mhlo.dot"(%1786, %cst_444) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1788 = chlo.broadcast_add %1787, %cst_443 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1789 = "mhlo.reshape"(%1788) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1790 = chlo.broadcast_add %1789, %1780 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1791 = chlo.broadcast_multiply %1790, %cst_442 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1792 = chlo.broadcast_add %1791, %cst_441 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1793 = "mhlo.reshape"(%1792) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1794 = "mhlo.dot"(%1793, %cst_446) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1795 = chlo.broadcast_add %1794, %cst_445 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1796 = "mhlo.reshape"(%1795) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1797 = chlo.broadcast_maximum %1796, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1798 = "mhlo.reshape"(%1797) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1799 = "mhlo.dot"(%1798, %cst_454) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1800 = chlo.broadcast_add %1799, %cst_453 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1801 = "mhlo.reshape"(%1800) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1802 = chlo.broadcast_add %1801, %1792 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1803 = chlo.broadcast_multiply %1802, %cst_448 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1804 = chlo.broadcast_add %1803, %cst_447 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1805 = "mhlo.reshape"(%1804) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1806 = "mhlo.dot"(%1805, %cst_452) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1807 = chlo.broadcast_add %1806, %cst_451 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1808 = "mhlo.reshape"(%1807) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1809 = chlo.broadcast_add %1808, %1712 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1810 = chlo.broadcast_multiply %1809, %cst_450 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1811 = chlo.broadcast_add %1810, %cst_449 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1812 = "mhlo.reshape"(%1811) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1813 = "mhlo.dot"(%1812, %cst_464) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1814 = chlo.broadcast_add %1813, %cst_463 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1815 = "mhlo.reshape"(%1814) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1816 = "mhlo.transpose"(%1815) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1817 = "mhlo.dot"(%1812, %cst_468) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1818 = "mhlo.reshape"(%1817) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1819 = "mhlo.broadcast_in_dim"(%cst_467) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1820 = mhlo.add %1818, %1819 : tensor<1x384x128xf32>
%1821 = chlo.broadcast_multiply %1820, %cst_466 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1822 = chlo.broadcast_add %1821, %cst_465 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1823 = "mhlo.reshape"(%1822) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1824 = "mhlo.dot"(%1823, %cst_460) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1825 = chlo.broadcast_add %1824, %cst_459 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1826 = "mhlo.reshape"(%1825) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1827 = "mhlo.transpose"(%1826) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1828 = "mhlo.dot"(%1823, %cst_462) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1829 = chlo.broadcast_add %1828, %cst_461 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1830 = "mhlo.reshape"(%1829) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1831 = "mhlo.transpose"(%1830) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1832 = "mhlo.dot_general"(%1831, %1827) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1833 = chlo.broadcast_multiply %1832, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1834 = chlo.broadcast_add %1833, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1835 = "mhlo.reduce"(%1834, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1836 = linalg.tensor_expand_shape %1835 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1837 = chlo.broadcast_subtract %1834, %1836 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1838 = "mhlo.exponential"(%1837) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1839 = "mhlo.reduce"(%1838, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1840 = linalg.tensor_expand_shape %1839 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1841 = chlo.broadcast_divide %1838, %1840 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1842 = "mhlo.dot_general"(%1841, %1816) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1843 = "mhlo.transpose"(%1842) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1844 = "mhlo.reshape"(%1843) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1845 = "mhlo.dot"(%1844, %cst_458) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1846 = chlo.broadcast_add %1845, %cst_457 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1847 = "mhlo.reshape"(%1846) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1848 = "mhlo.dot"(%1812, %cst_471) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1849 = chlo.broadcast_add %1848, %cst_470 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1850 = "mhlo.reshape"(%1849) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1851 = chlo.broadcast_multiply %1850, %cst_469 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1852 = chlo.broadcast_add %1851, %cst_457 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1853 = chlo.broadcast_add %1847, %1852 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1854 = chlo.broadcast_multiply %1853, %cst_456 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1855 = chlo.broadcast_add %1854, %cst_455 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1856 = "mhlo.reshape"(%1855) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1857 = "mhlo.dot"(%1856, %cst_473) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1858 = chlo.broadcast_add %1857, %cst_472 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1859 = "mhlo.reshape"(%1858) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1860 = chlo.broadcast_maximum %1859, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1861 = "mhlo.reshape"(%1860) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1862 = "mhlo.dot"(%1861, %cst_477) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1863 = chlo.broadcast_add %1862, %cst_476 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1864 = "mhlo.reshape"(%1863) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1865 = chlo.broadcast_add %1864, %1855 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1866 = chlo.broadcast_multiply %1865, %cst_475 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1867 = chlo.broadcast_add %1866, %cst_474 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1868 = "mhlo.reshape"(%1867) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1869 = "mhlo.dot"(%1868, %cst_479) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1870 = chlo.broadcast_add %1869, %cst_478 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1871 = "mhlo.reshape"(%1870) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1872 = chlo.broadcast_maximum %1871, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1873 = "mhlo.reshape"(%1872) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1874 = "mhlo.dot"(%1873, %cst_483) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1875 = chlo.broadcast_add %1874, %cst_482 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1876 = "mhlo.reshape"(%1875) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1877 = chlo.broadcast_add %1876, %1867 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1878 = chlo.broadcast_multiply %1877, %cst_481 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1879 = chlo.broadcast_add %1878, %cst_480 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1880 = "mhlo.reshape"(%1879) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1881 = "mhlo.dot"(%1880, %cst_485) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1882 = chlo.broadcast_add %1881, %cst_484 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1883 = "mhlo.reshape"(%1882) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1884 = chlo.broadcast_maximum %1883, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1885 = "mhlo.reshape"(%1884) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1886 = "mhlo.dot"(%1885, %cst_489) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1887 = chlo.broadcast_add %1886, %cst_488 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1888 = "mhlo.reshape"(%1887) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1889 = chlo.broadcast_add %1888, %1879 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1890 = chlo.broadcast_multiply %1889, %cst_487 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1891 = chlo.broadcast_add %1890, %cst_486 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1892 = "mhlo.reshape"(%1891) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1893 = "mhlo.dot"(%1892, %cst_491) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1894 = chlo.broadcast_add %1893, %cst_490 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1895 = "mhlo.reshape"(%1894) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1896 = chlo.broadcast_maximum %1895, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1897 = "mhlo.reshape"(%1896) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1898 = "mhlo.dot"(%1897, %cst_499) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1899 = chlo.broadcast_add %1898, %cst_498 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1900 = "mhlo.reshape"(%1899) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1901 = chlo.broadcast_add %1900, %1891 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1902 = chlo.broadcast_multiply %1901, %cst_493 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1903 = chlo.broadcast_add %1902, %cst_492 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1904 = "mhlo.reshape"(%1903) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1905 = "mhlo.dot"(%1904, %cst_497) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1906 = chlo.broadcast_add %1905, %cst_496 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1907 = "mhlo.reshape"(%1906) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1908 = chlo.broadcast_add %1907, %1811 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%1909 = chlo.broadcast_multiply %1908, %cst_495 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1910 = chlo.broadcast_add %1909, %cst_494 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%1911 = "mhlo.reshape"(%1910) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1912 = "mhlo.dot"(%1911, %cst_509) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1913 = chlo.broadcast_add %1912, %cst_508 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1914 = "mhlo.reshape"(%1913) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1915 = "mhlo.transpose"(%1914) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1916 = "mhlo.dot"(%1911, %cst_513) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1917 = "mhlo.reshape"(%1916) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1918 = "mhlo.broadcast_in_dim"(%cst_512) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%1919 = mhlo.add %1917, %1918 : tensor<1x384x128xf32>
%1920 = chlo.broadcast_multiply %1919, %cst_511 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1921 = chlo.broadcast_add %1920, %cst_510 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1922 = "mhlo.reshape"(%1921) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1923 = "mhlo.dot"(%1922, %cst_505) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1924 = chlo.broadcast_add %1923, %cst_504 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1925 = "mhlo.reshape"(%1924) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1926 = "mhlo.transpose"(%1925) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1927 = "mhlo.dot"(%1922, %cst_507) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1928 = chlo.broadcast_add %1927, %cst_506 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1929 = "mhlo.reshape"(%1928) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%1930 = "mhlo.transpose"(%1929) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%1931 = "mhlo.dot_general"(%1930, %1926) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<3> : tensor<1xi64>}} : (tensor<1x4x384x32xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x384xf32>
%1932 = chlo.broadcast_multiply %1931, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384x384xf32>
%1933 = chlo.broadcast_add %1932, %24 : (tensor<1x4x384x384xf32>, tensor<1x1x384x384xf32>) -> tensor<1x4x384x384xf32>
%1934 = "mhlo.reduce"(%1933, %4) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.maximum %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1935 = linalg.tensor_expand_shape %1934 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1936 = chlo.broadcast_subtract %1933, %1935 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1937 = "mhlo.exponential"(%1936) : (tensor<1x4x384x384xf32>) -> tensor<1x4x384x384xf32>
%1938 = "mhlo.reduce"(%1937, %5) ( {
^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
%2417 = mhlo.add %arg3, %arg4 : tensor<f32>
"mhlo.return"(%2417) : (tensor<f32>) -> ()
}) {dimensions = dense<3> : tensor<1xi64>} : (tensor<1x4x384x384xf32>, tensor<f32>) -> tensor<1x4x384xf32>
%1939 = linalg.tensor_expand_shape %1938 [[0], [1], [2, 3]] : tensor<1x4x384xf32> into tensor<1x4x384x1xf32>
%1940 = chlo.broadcast_divide %1937, %1939 : (tensor<1x4x384x384xf32>, tensor<1x4x384x1xf32>) -> tensor<1x4x384x384xf32>
%1941 = "mhlo.dot_general"(%1940, %1915) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contracting_dimensions = dense<3> : tensor<1xi64>, rhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, rhs_contracting_dimensions = dense<2> : tensor<1xi64>}} : (tensor<1x4x384x384xf32>, tensor<1x4x384x32xf32>) -> tensor<1x4x384x32xf32>
%1942 = "mhlo.transpose"(%1941) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x4x384x32xf32>) -> tensor<1x384x4x32xf32>
%1943 = "mhlo.reshape"(%1942) : (tensor<1x384x4x32xf32>) -> tensor<384x128xf32>
%1944 = "mhlo.dot"(%1943, %cst_503) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%1945 = chlo.broadcast_add %1944, %cst_502 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1946 = "mhlo.reshape"(%1945) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1947 = "mhlo.dot"(%1911, %cst_516) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1948 = chlo.broadcast_add %1947, %cst_515 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1949 = "mhlo.reshape"(%1948) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1950 = chlo.broadcast_multiply %1949, %cst_514 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1951 = chlo.broadcast_add %1950, %cst_502 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1952 = chlo.broadcast_add %1946, %1951 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1953 = chlo.broadcast_multiply %1952, %cst_501 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1954 = chlo.broadcast_add %1953, %cst_500 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1955 = "mhlo.reshape"(%1954) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1956 = "mhlo.dot"(%1955, %cst_518) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1957 = chlo.broadcast_add %1956, %cst_517 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1958 = "mhlo.reshape"(%1957) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1959 = chlo.broadcast_maximum %1958, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1960 = "mhlo.reshape"(%1959) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1961 = "mhlo.dot"(%1960, %cst_522) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1962 = chlo.broadcast_add %1961, %cst_521 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1963 = "mhlo.reshape"(%1962) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1964 = chlo.broadcast_add %1963, %1954 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1965 = chlo.broadcast_multiply %1964, %cst_520 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1966 = chlo.broadcast_add %1965, %cst_519 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1967 = "mhlo.reshape"(%1966) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1968 = "mhlo.dot"(%1967, %cst_524) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1969 = chlo.broadcast_add %1968, %cst_523 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1970 = "mhlo.reshape"(%1969) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1971 = chlo.broadcast_maximum %1970, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1972 = "mhlo.reshape"(%1971) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1973 = "mhlo.dot"(%1972, %cst_528) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1974 = chlo.broadcast_add %1973, %cst_527 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1975 = "mhlo.reshape"(%1974) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1976 = chlo.broadcast_add %1975, %1966 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1977 = chlo.broadcast_multiply %1976, %cst_526 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1978 = chlo.broadcast_add %1977, %cst_525 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1979 = "mhlo.reshape"(%1978) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1980 = "mhlo.dot"(%1979, %cst_530) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1981 = chlo.broadcast_add %1980, %cst_529 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1982 = "mhlo.reshape"(%1981) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1983 = chlo.broadcast_maximum %1982, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1984 = "mhlo.reshape"(%1983) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1985 = "mhlo.dot"(%1984, %cst_534) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1986 = chlo.broadcast_add %1985, %cst_533 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1987 = "mhlo.reshape"(%1986) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%1988 = chlo.broadcast_add %1987, %1978 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%1989 = chlo.broadcast_multiply %1988, %cst_532 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1990 = chlo.broadcast_add %1989, %cst_531 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%1991 = "mhlo.reshape"(%1990) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%1992 = "mhlo.dot"(%1991, %cst_536) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%1993 = chlo.broadcast_add %1992, %cst_535 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%1994 = "mhlo.reshape"(%1993) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%1995 = chlo.broadcast_maximum %1994, %5 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x384x512xf32>, tensor<f32>) -> tensor<1x384x512xf32>
%1996 = "mhlo.reshape"(%1995) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%1997 = "mhlo.dot"(%1996, %cst_544) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%1998 = chlo.broadcast_add %1997, %cst_543 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%1999 = "mhlo.reshape"(%1998) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2000 = chlo.broadcast_add %1999, %1990 : (tensor<1x384x128xf32>, tensor<1x384x128xf32>) -> tensor<1x384x128xf32>
%2001 = chlo.broadcast_multiply %2000, %cst_538 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2002 = chlo.broadcast_add %2001, %cst_537 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2003 = "mhlo.reshape"(%2002) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2004 = "mhlo.dot"(%2003, %cst_542) : (tensor<384x128xf32>, tensor<128x512xf32>) -> tensor<384x512xf32>
%2005 = chlo.broadcast_add %2004, %cst_541 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x512xf32>, tensor<512xf32>) -> tensor<384x512xf32>
%2006 = "mhlo.reshape"(%2005) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
%2007 = chlo.broadcast_add %2006, %1910 : (tensor<1x384x512xf32>, tensor<1x384x512xf32>) -> tensor<1x384x512xf32>
%2008 = chlo.broadcast_multiply %2007, %cst_540 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2009 = chlo.broadcast_add %2008, %cst_539 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x512xf32>, tensor<512xf32>) -> tensor<1x384x512xf32>
%2010 = "mhlo.reshape"(%2009) : (tensor<1x384x512xf32>) -> tensor<384x512xf32>
%2011 = "mhlo.dot"(%2010, %cst_599) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2012 = chlo.broadcast_add %2011, %cst_598 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2013 = "mhlo.reshape"(%2012) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2014 = "mhlo.transpose"(%2013) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2015 = "mhlo.dot"(%2010, %cst_603) : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
%2016 = "mhlo.reshape"(%2015) : (tensor<384x128xf32>) -> tensor<1x384x128xf32>
%2017 = "mhlo.broadcast_in_dim"(%cst_602) {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x384x128xf32>
%2018 = mhlo.add %2016, %2017 : tensor<1x384x128xf32>
%2019 = chlo.broadcast_multiply %2018, %cst_601 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2020 = chlo.broadcast_add %2019, %cst_600 {broadcast_dimensions = dense<2> : tensor<1xi64>} : (tensor<1x384x128xf32>, tensor<128xf32>) -> tensor<1x384x128xf32>
%2021 = "mhlo.reshape"(%2020) : (tensor<1x384x128xf32>) -> tensor<384x128xf32>
%2022 = "mhlo.dot"(%2021, %cst_595) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2023 = chlo.broadcast_add %2022, %cst_594 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2024 = "mhlo.reshape"(%2023) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2025 = "mhlo.transpose"(%2024) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2026 = "mhlo.dot"(%2021, %cst_597) : (tensor<384x128xf32>, tensor<128x128xf32>) -> tensor<384x128xf32>
%2027 = chlo.broadcast_add %2026, %cst_596 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<384x128xf32>, tensor<128xf32>) -> tensor<384x128xf32>
%2028 = "mhlo.reshape"(%2027) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
%2029 = "mhlo.transpose"(%2028) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
%2030 = "mhlo.dot_general"(%2029, %2025) {dot_dimension_numbers = {lhs_batching_dimensions = dense<[0, 1]> : tensor<2xi64>, lhs_contract
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment