antiagainst · August 9, 2021 21:48
diff --git a/README.txt b/README.txt
diff --git a/MobileNetV2-2021-08-09.mlir b/MobileNetV2-2021-08-09.mlir
 // -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
 builtin.module  {
  flow.variable @"__iree_flow___sm_node163__m.layer-1.kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node169__m.layer-2.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node170__m.layer-2.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node171__m.layer-2.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node172__m.layer-2.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node181__m.layer-4.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node187__m.layer-5.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node188__m.layer-5.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node189__m.layer-5.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node190__m.layer-5.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node199__m.layer-7.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node205__m.layer-8.gamma" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node206__m.layer-8.beta" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node207__m.layer-8.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node208__m.layer-8.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node213__m.layer-9.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node219__m.layer-10.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node220__m.layer-10.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node221__m.layer-10.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node222__m.layer-10.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node235__m.layer-13.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node241__m.layer-14.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node242__m.layer-14.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node243__m.layer-14.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node244__m.layer-14.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node253__m.layer-16.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node259__m.layer-17.gamma" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node260__m.layer-17.beta" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node261__m.layer-17.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node262__m.layer-17.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node267__m.layer-18.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node273__m.layer-19.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node274__m.layer-19.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node275__m.layer-19.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node276__m.layer-19.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node285__m.layer-21.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node291__m.layer-22.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node292__m.layer-22.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node293__m.layer-22.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node294__m.layer-22.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node303__m.layer-24.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node309__m.layer-25.gamma" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node310__m.layer-25.beta" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node311__m.layer-25.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node312__m.layer-25.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node321__m.layer-27.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node327__m.layer-28.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node328__m.layer-28.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node329__m.layer-28.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node330__m.layer-28.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node343__m.layer-31.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node349__m.layer-32.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node350__m.layer-32.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node351__m.layer-32.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node352__m.layer-32.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node361__m.layer-34.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node367__m.layer-35.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node368__m.layer-35.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node369__m.layer-35.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node370__m.layer-35.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node375__m.layer-36.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node381__m.layer-37.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node382__m.layer-37.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node383__m.layer-37.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node384__m.layer-37.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node393__m.layer-39.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node399__m.layer-40.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node400__m.layer-40.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node401__m.layer-40.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node402__m.layer-40.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node411__m.layer-42.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node417__m.layer-43.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node418__m.layer-43.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node419__m.layer-43.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node420__m.layer-43.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node429__m.layer-45.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node435__m.layer-46.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node436__m.layer-46.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node437__m.layer-46.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node438__m.layer-46.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node447__m.layer-48.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node453__m.layer-49.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node454__m.layer-49.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node455__m.layer-49.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node456__m.layer-49.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node465__m.layer-51.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node471__m.layer-52.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node472__m.layer-52.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node473__m.layer-52.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node474__m.layer-52.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node483__m.layer-54.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node489__m.layer-55.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node490__m.layer-55.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node491__m.layer-55.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node492__m.layer-55.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node505__m.layer-58.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node511__m.layer-59.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node512__m.layer-59.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node513__m.layer-59.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node514__m.layer-59.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node523__m.layer-61.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node529__m.layer-62.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node530__m.layer-62.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node531__m.layer-62.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node532__m.layer-62.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node537__m.layer-63.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node543__m.layer-64.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node544__m.layer-64.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node545__m.layer-64.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node546__m.layer-64.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node555__m.layer-66.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node561__m.layer-67.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node562__m.layer-67.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node563__m.layer-67.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node564__m.layer-67.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node573__m.layer-69.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node579__m.layer-70.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node580__m.layer-70.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node581__m.layer-70.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node582__m.layer-70.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node591__m.layer-72.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node597__m.layer-73.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node598__m.layer-73.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node599__m.layer-73.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node600__m.layer-73.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node609__m.layer-75.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node615__m.layer-76.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node616__m.layer-76.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node617__m.layer-76.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node618__m.layer-76.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node627__m.layer-78.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node633__m.layer-79.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node634__m.layer-79.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node635__m.layer-79.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node636__m.layer-79.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node645__m.layer-81.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node651__m.layer-82.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node652__m.layer-82.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node653__m.layer-82.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node654__m.layer-82.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node663__m.layer-84.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node669__m.layer-85.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node670__m.layer-85.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node671__m.layer-85.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node672__m.layer-85.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node681__m.layer-87.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node687__m.layer-88.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node688__m.layer-88.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node689__m.layer-88.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node690__m.layer-88.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node699__m.layer-90.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node705__m.layer-91.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node706__m.layer-91.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node707__m.layer-91.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node708__m.layer-91.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node717__m.layer-93.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node723__m.layer-94.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node724__m.layer-94.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node725__m.layer-94.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node726__m.layer-94.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node735__m.layer-96.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node741__m.layer-97.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node742__m.layer-97.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node743__m.layer-97.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node744__m.layer-97.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node749__m.layer-98.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node755__m.layer-99.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node756__m.layer-99.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node757__m.layer-99.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node758__m.layer-99.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node767__m.layer-101.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node773__m.layer-102.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node774__m.layer-102.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node775__m.layer-102.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node776__m.layer-102.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node785__m.layer-104.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node791__m.layer-105.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node792__m.layer-105.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node793__m.layer-105.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node794__m.layer-105.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node803__m.layer-107.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node809__m.layer-108.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node810__m.layer-108.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node811__m.layer-108.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node812__m.layer-108.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node821__m.layer-110.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node827__m.layer-111.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node828__m.layer-111.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node829__m.layer-111.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node830__m.layer-111.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node839__m.layer-113.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node845__m.layer-114.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node846__m.layer-114.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node847__m.layer-114.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node848__m.layer-114.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node857__m.layer-116.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node863__m.layer-117.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node864__m.layer-117.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node865__m.layer-117.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node866__m.layer-117.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node879__m.layer-120.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node885__m.layer-121.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node886__m.layer-121.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node887__m.layer-121.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node888__m.layer-121.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node897__m.layer-123.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node903__m.layer-124.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node904__m.layer-124.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node905__m.layer-124.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node906__m.layer-124.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node911__m.layer-125.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node917__m.layer-126.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node918__m.layer-126.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node919__m.layer-126.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node920__m.layer-126.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node929__m.layer-128.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node935__m.layer-129.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node936__m.layer-129.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node937__m.layer-129.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node938__m.layer-129.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node947__m.layer-131.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node953__m.layer-132.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node954__m.layer-132.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node955__m.layer-132.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node956__m.layer-132.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node965__m.layer-134.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node971__m.layer-135.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node972__m.layer-135.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node973__m.layer-135.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node974__m.layer-135.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node983__m.layer-137.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node989__m.layer-138.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node990__m.layer-138.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node991__m.layer-138.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node992__m.layer-138.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1001__m.layer-140.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1007__m.layer-141.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1008__m.layer-141.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1009__m.layer-141.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1010__m.layer-141.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1019__m.layer-143.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1025__m.layer-144.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1026__m.layer-144.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1027__m.layer-144.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1028__m.layer-144.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1037__m.layer-146.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1043__m.layer-147.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1044__m.layer-147.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1045__m.layer-147.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1046__m.layer-147.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1055__m.layer-149.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1061__m.layer-150.gamma" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1062__m.layer-150.beta" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1063__m.layer-150.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1064__m.layer-150.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1069__m.layer-151.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1075__m.layer-152.gamma" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1076__m.layer-152.beta" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1077__m.layer-152.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1078__m.layer-152.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1091__m.layer-155.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1092__m.layer-155.bias" opaque<"_", "0xDEADBEEF"> : tensor<1000xf32> attributes {sym_visibility = "private"}
  builtin.func @call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"}} {
    %0 = call @_call(%arg0) : (!hal.buffer_view) -> !hal.buffer_view
    return %0 : !hal.buffer_view
  }
  builtin.func private @_call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"} {
    %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x224x224x3xf32>
    %1 = call @"__inference_<lambda>_133580"(%0) : (tensor<1x224x224x3xf32>) -> tensor<1x1000xf32>
    %2 = hal.tensor.cast %1 : tensor<1x1000xf32> -> !hal.buffer_view
    return %2 : !hal.buffer_view
  }
  builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
    %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
    %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
    %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
    %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
    %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
    %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
    %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
    %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
    %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
    %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
    %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
    %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
    %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
    %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
    %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
    %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %17 = mhlo.constant dense<0.000000e+00> : tensor<f32>
    %18 = mhlo.constant dense<0xFF800000> : tensor<f32>
    %19 = mhlo.constant dense<4.900000e+01> : tensor<f32>
    %20 = mhlo.constant dense<6.000000e+00> : tensor<f32>
    %21 = mhlo.convolution(%arg0, %cst_15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
    %22 = "mhlo.batch_norm_inference"(%21, %cst_16, %cst_17, %cst_18, %cst_19) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %23 = "mhlo.clamp"(%17, %22, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %24 = mhlo.convolution(%23, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
    %25 = "mhlo.batch_norm_inference"(%24, %cst_6, %cst_7, %cst_8, %cst_9) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %26 = "mhlo.clamp"(%17, %25, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %27 = mhlo.convolution(%26, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
    %28 = "mhlo.batch_norm_inference"(%27, %cst_2, %cst_3, %cst_4, %cst_5) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %29 = mhlo.convolution(%28, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
    %30 = "mhlo.batch_norm_inference"(%29, %cst_138, %cst_139, %cst_140, %cst_141) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %31 = "mhlo.clamp"(%17, %30, %20) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
    %32 = "mhlo.pad"(%31, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
    %33 = mhlo.convolution(%32, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
    %34 = "mhlo.batch_norm_inference"(%33, %cst_142, %cst_143, %cst_144, %cst_145) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %35 = "mhlo.clamp"(%17, %34, %20) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
    %36 = mhlo.convolution(%35, %cst_132) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
    %37 = "mhlo.batch_norm_inference"(%36, %cst_133, %cst_134, %cst_135, %cst_136) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %38 = mhlo.convolution(%37, %cst_123) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %39 = "mhlo.batch_norm_inference"(%38, %cst_124, %cst_125, %cst_126, %cst_127) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %40 = "mhlo.clamp"(%17, %39, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %41 = mhlo.convolution(%40, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
    %42 = "mhlo.batch_norm_inference"(%41, %cst_128, %cst_129, %cst_130, %cst_131) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %43 = "mhlo.clamp"(%17, %42, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %44 = mhlo.convolution(%43, %cst_118) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
    %45 = "mhlo.batch_norm_inference"(%44, %cst_119, %cst_120, %cst_121, %cst_122) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
    %47 = mhlo.convolution(%46, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %48 = "mhlo.batch_norm_inference"(%47, %cst_110, %cst_111, %cst_112, %cst_113) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %49 = "mhlo.clamp"(%17, %48, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %50 = "mhlo.pad"(%49, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
    %51 = mhlo.convolution(%50, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
    %52 = "mhlo.batch_norm_inference"(%51, %cst_114, %cst_115, %cst_116, %cst_117) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %53 = "mhlo.clamp"(%17, %52, %20) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
    %54 = mhlo.convolution(%53, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
    %55 = "mhlo.batch_norm_inference"(%54, %cst_105, %cst_106, %cst_107, %cst_108) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %56 = mhlo.convolution(%55, %cst_95) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %57 = "mhlo.batch_norm_inference"(%56, %cst_96, %cst_97, %cst_98, %cst_99) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %58 = "mhlo.clamp"(%17, %57, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %59 = mhlo.convolution(%58, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %60 = "mhlo.batch_norm_inference"(%59, %cst_100, %cst_101, %cst_102, %cst_103) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %61 = "mhlo.clamp"(%17, %60, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %62 = mhlo.convolution(%61, %cst_90) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %63 = "mhlo.batch_norm_inference"(%62, %cst_91, %cst_92, %cst_93, %cst_94) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %65 = mhlo.convolution(%64, %cst_81) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %66 = "mhlo.batch_norm_inference"(%65, %cst_82, %cst_83, %cst_84, %cst_85) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %67 = "mhlo.clamp"(%17, %66, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %68 = mhlo.convolution(%67, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %69 = "mhlo.batch_norm_inference"(%68, %cst_86, %cst_87, %cst_88, %cst_89) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %70 = "mhlo.clamp"(%17, %69, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %71 = mhlo.convolution(%70, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %72 = "mhlo.batch_norm_inference"(%71, %cst_77, %cst_78, %cst_79, %cst_80) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %74 = mhlo.convolution(%73, %cst_67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %75 = "mhlo.batch_norm_inference"(%74, %cst_68, %cst_69, %cst_70, %cst_71) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %76 = "mhlo.clamp"(%17, %75, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %77 = "mhlo.pad"(%76, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
    %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
    %79 = "mhlo.batch_norm_inference"(%78, %cst_72, %cst_73, %cst_74, %cst_75) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %80 = "mhlo.clamp"(%17, %79, %20) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
    %81 = mhlo.convolution(%80, %cst_62) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
    %82 = "mhlo.batch_norm_inference"(%81, %cst_63, %cst_64, %cst_65, %cst_66) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %83 = mhlo.convolution(%82, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %84 = "mhlo.batch_norm_inference"(%83, %cst_54, %cst_55, %cst_56, %cst_57) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %85 = "mhlo.clamp"(%17, %84, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %86 = mhlo.convolution(%85, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %87 = "mhlo.batch_norm_inference"(%86, %cst_58, %cst_59, %cst_60, %cst_61) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %88 = "mhlo.clamp"(%17, %87, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %89 = mhlo.convolution(%88, %cst_48) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %90 = "mhlo.batch_norm_inference"(%89, %cst_49, %cst_50, %cst_51, %cst_52) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %92 = mhlo.convolution(%91, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %93 = "mhlo.batch_norm_inference"(%92, %cst_40, %cst_41, %cst_42, %cst_43) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %94 = "mhlo.clamp"(%17, %93, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %95 = mhlo.convolution(%94, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %96 = "mhlo.batch_norm_inference"(%95, %cst_44, %cst_45, %cst_46, %cst_47) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %97 = "mhlo.clamp"(%17, %96, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %98 = mhlo.convolution(%97, %cst_34) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %99 = "mhlo.batch_norm_inference"(%98, %cst_35, %cst_36, %cst_37, %cst_38) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %101 = mhlo.convolution(%100, %cst_25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %102 = "mhlo.batch_norm_inference"(%101, %cst_26, %cst_27, %cst_28, %cst_29) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %103 = "mhlo.clamp"(%17, %102, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %104 = mhlo.convolution(%103, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %105 = "mhlo.batch_norm_inference"(%104, %cst_30, %cst_31, %cst_32, %cst_33) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %106 = "mhlo.clamp"(%17, %105, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %107 = mhlo.convolution(%106, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %108 = "mhlo.batch_norm_inference"(%107, %cst_21, %cst_22, %cst_23, %cst_24) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %110 = mhlo.convolution(%109, %cst_235) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %111 = "mhlo.batch_norm_inference"(%110, %cst_236, %cst_237, %cst_238, %cst_239) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %112 = "mhlo.clamp"(%17, %111, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %113 = mhlo.convolution(%112, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %114 = "mhlo.batch_norm_inference"(%113, %cst_240, %cst_241, %cst_242, %cst_243) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %115 = "mhlo.clamp"(%17, %114, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %116 = mhlo.convolution(%115, %cst_230) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
    %117 = "mhlo.batch_norm_inference"(%116, %cst_231, %cst_232, %cst_233, %cst_234) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %118 = mhlo.convolution(%117, %cst_221) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %119 = "mhlo.batch_norm_inference"(%118, %cst_222, %cst_223, %cst_224, %cst_225) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %120 = "mhlo.clamp"(%17, %119, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %121 = mhlo.convolution(%120, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %122 = "mhlo.batch_norm_inference"(%121, %cst_226, %cst_227, %cst_228, %cst_229) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %123 = "mhlo.clamp"(%17, %122, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %124 = mhlo.convolution(%123, %cst_216) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %125 = "mhlo.batch_norm_inference"(%124, %cst_217, %cst_218, %cst_219, %cst_220) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %127 = mhlo.convolution(%126, %cst_207) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %128 = "mhlo.batch_norm_inference"(%127, %cst_208, %cst_209, %cst_210, %cst_211) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %129 = "mhlo.clamp"(%17, %128, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %130 = mhlo.convolution(%129, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %131 = "mhlo.batch_norm_inference"(%130, %cst_212, %cst_213, %cst_214, %cst_215) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %132 = "mhlo.clamp"(%17, %131, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %133 = mhlo.convolution(%132, %cst_202) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %134 = "mhlo.batch_norm_inference"(%133, %cst_203, %cst_204, %cst_205, %cst_206) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %136 = mhlo.convolution(%135, %cst_193) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %137 = "mhlo.batch_norm_inference"(%136, %cst_194, %cst_195, %cst_196, %cst_197) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %138 = "mhlo.clamp"(%17, %137, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %139 = "mhlo.pad"(%138, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
    %140 = mhlo.convolution(%139, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
    %141 = "mhlo.batch_norm_inference"(%140, %cst_198, %cst_199, %cst_200, %cst_201) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %142 = "mhlo.clamp"(%17, %141, %20) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
    %143 = mhlo.convolution(%142, %cst_188) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
    %144 = "mhlo.batch_norm_inference"(%143, %cst_189, %cst_190, %cst_191, %cst_192) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %145 = mhlo.convolution(%144, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %146 = "mhlo.batch_norm_inference"(%145, %cst_180, %cst_181, %cst_182, %cst_183) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %147 = "mhlo.clamp"(%17, %146, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %148 = mhlo.convolution(%147, %2) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %149 = "mhlo.batch_norm_inference"(%148, %cst_184, %cst_185, %cst_186, %cst_187) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %150 = "mhlo.clamp"(%17, %149, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %151 = mhlo.convolution(%150, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %152 = "mhlo.batch_norm_inference"(%151, %cst_175, %cst_176, %cst_177, %cst_178) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %154 = mhlo.convolution(%153, %cst_165) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %155 = "mhlo.batch_norm_inference"(%154, %cst_166, %cst_167, %cst_168, %cst_169) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %156 = "mhlo.clamp"(%17, %155, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %157 = mhlo.convolution(%156, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %158 = "mhlo.batch_norm_inference"(%157, %cst_170, %cst_171, %cst_172, %cst_173) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %159 = "mhlo.clamp"(%17, %158, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %160 = mhlo.convolution(%159, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %161 = "mhlo.batch_norm_inference"(%160, %cst_161, %cst_162, %cst_163, %cst_164) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %163 = mhlo.convolution(%162, %cst_151) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %164 = "mhlo.batch_norm_inference"(%163, %cst_152, %cst_153, %cst_154, %cst_155) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %165 = "mhlo.clamp"(%17, %164, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %166 = mhlo.convolution(%165, %0) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %167 = "mhlo.batch_norm_inference"(%166, %cst_156, %cst_157, %cst_158, %cst_159) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %168 = "mhlo.clamp"(%17, %167, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %169 = mhlo.convolution(%168, %cst_146) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
    %170 = "mhlo.batch_norm_inference"(%169, %cst_147, %cst_148, %cst_149, %cst_150) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %171 = mhlo.convolution(%170, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
    %172 = "mhlo.batch_norm_inference"(%171, %cst_11, %cst_12, %cst_13, %cst_14) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %173 = "mhlo.clamp"(%17, %172, %20) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
    %174 = "mhlo.reduce"(%173, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %175 = chlo.broadcast_divide %174, %19 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %176 = "mhlo.dot"(%175, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
    %177 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
    %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
    %179 = "mhlo.reduce"(%178, %18) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
    %183 = "mhlo.reduce"(%182, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    return %185 : tensor<1x1000xf32>
  }
 }


 // -----// IR Dump After Canonicalizer //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %0 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %1 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %2 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %3 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %21 = mhlo.convolution(%arg0, %cst_227) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %22 = "mhlo.batch_norm_inference"(%21, %cst_226, %cst_225, %cst_224, %cst_223) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %23 = "mhlo.clamp"(%3, %22, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %24 = mhlo.convolution(%23, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
  %25 = "mhlo.batch_norm_inference"(%24, %cst_236, %cst_235, %cst_234, %cst_233) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %26 = "mhlo.clamp"(%3, %25, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %27 = mhlo.convolution(%26, %cst_241) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %28 = "mhlo.batch_norm_inference"(%27, %cst_240, %cst_239, %cst_238, %cst_237) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %29 = mhlo.convolution(%28, %cst_105) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %30 = "mhlo.batch_norm_inference"(%29, %cst_104, %cst_103, %cst_102, %cst_101) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %31 = "mhlo.clamp"(%3, %30, %0) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %32 = "mhlo.pad"(%31, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %33 = mhlo.convolution(%32, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
  %34 = "mhlo.batch_norm_inference"(%33, %cst_100, %cst_99, %cst_98, %cst_97) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %35 = "mhlo.clamp"(%3, %34, %0) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %36 = mhlo.convolution(%35, %cst_110) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %37 = "mhlo.batch_norm_inference"(%36, %cst_109, %cst_108, %cst_107, %cst_106) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %38 = mhlo.convolution(%37, %cst_119) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %39 = "mhlo.batch_norm_inference"(%38, %cst_118, %cst_117, %cst_116, %cst_115) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %40 = "mhlo.clamp"(%3, %39, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %41 = mhlo.convolution(%40, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
  %42 = "mhlo.batch_norm_inference"(%41, %cst_114, %cst_113, %cst_112, %cst_111) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %43 = "mhlo.clamp"(%3, %42, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %44 = mhlo.convolution(%43, %cst_124) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %45 = "mhlo.batch_norm_inference"(%44, %cst_123, %cst_122, %cst_121, %cst_120) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %47 = mhlo.convolution(%46, %cst_133) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %48 = "mhlo.batch_norm_inference"(%47, %cst_132, %cst_131, %cst_130, %cst_129) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %49 = "mhlo.clamp"(%3, %48, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %50 = "mhlo.pad"(%49, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %51 = mhlo.convolution(%50, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
  %52 = "mhlo.batch_norm_inference"(%51, %cst_128, %cst_127, %cst_126, %cst_125) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %53 = "mhlo.clamp"(%3, %52, %0) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %54 = mhlo.convolution(%53, %cst_138) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %55 = "mhlo.batch_norm_inference"(%54, %cst_137, %cst_136, %cst_135, %cst_134) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %56 = mhlo.convolution(%55, %cst_147) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %57 = "mhlo.batch_norm_inference"(%56, %cst_146, %cst_145, %cst_144, %cst_143) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %58 = "mhlo.clamp"(%3, %57, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %59 = mhlo.convolution(%58, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %60 = "mhlo.batch_norm_inference"(%59, %cst_142, %cst_141, %cst_140, %cst_139) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %61 = "mhlo.clamp"(%3, %60, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %62 = mhlo.convolution(%61, %cst_152) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %63 = "mhlo.batch_norm_inference"(%62, %cst_151, %cst_150, %cst_149, %cst_148) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %65 = mhlo.convolution(%64, %cst_161) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %66 = "mhlo.batch_norm_inference"(%65, %cst_160, %cst_159, %cst_158, %cst_157) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %67 = "mhlo.clamp"(%3, %66, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %68 = mhlo.convolution(%67, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %69 = "mhlo.batch_norm_inference"(%68, %cst_156, %cst_155, %cst_154, %cst_153) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %70 = "mhlo.clamp"(%3, %69, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %71 = mhlo.convolution(%70, %cst_166) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %72 = "mhlo.batch_norm_inference"(%71, %cst_165, %cst_164, %cst_163, %cst_162) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %74 = mhlo.convolution(%73, %cst_175) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %75 = "mhlo.batch_norm_inference"(%74, %cst_174, %cst_173, %cst_172, %cst_171) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %76 = "mhlo.clamp"(%3, %75, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %77 = "mhlo.pad"(%76, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
  %79 = "mhlo.batch_norm_inference"(%78, %cst_170, %cst_169, %cst_168, %cst_167) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %80 = "mhlo.clamp"(%3, %79, %0) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %81 = mhlo.convolution(%80, %cst_180) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %82 = "mhlo.batch_norm_inference"(%81, %cst_179, %cst_178, %cst_177, %cst_176) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %83 = mhlo.convolution(%82, %cst_189) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %84 = "mhlo.batch_norm_inference"(%83, %cst_188, %cst_187, %cst_186, %cst_185) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %85 = "mhlo.clamp"(%3, %84, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %86 = mhlo.convolution(%85, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %87 = "mhlo.batch_norm_inference"(%86, %cst_184, %cst_183, %cst_182, %cst_181) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %88 = "mhlo.clamp"(%3, %87, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %89 = mhlo.convolution(%88, %cst_194) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %90 = "mhlo.batch_norm_inference"(%89, %cst_193, %cst_192, %cst_191, %cst_190) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %92 = mhlo.convolution(%91, %cst_203) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %93 = "mhlo.batch_norm_inference"(%92, %cst_202, %cst_201, %cst_200, %cst_199) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %94 = "mhlo.clamp"(%3, %93, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %95 = mhlo.convolution(%94, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %96 = "mhlo.batch_norm_inference"(%95, %cst_198, %cst_197, %cst_196, %cst_195) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %97 = "mhlo.clamp"(%3, %96, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %98 = mhlo.convolution(%97, %cst_208) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %99 = "mhlo.batch_norm_inference"(%98, %cst_207, %cst_206, %cst_205, %cst_204) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %101 = mhlo.convolution(%100, %cst_217) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %102 = "mhlo.batch_norm_inference"(%101, %cst_216, %cst_215, %cst_214, %cst_213) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %103 = "mhlo.clamp"(%3, %102, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %104 = mhlo.convolution(%103, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %105 = "mhlo.batch_norm_inference"(%104, %cst_212, %cst_211, %cst_210, %cst_209) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %106 = "mhlo.clamp"(%3, %105, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %107 = mhlo.convolution(%106, %cst_222) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %108 = "mhlo.batch_norm_inference"(%107, %cst_221, %cst_220, %cst_219, %cst_218) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %110 = mhlo.convolution(%109, %cst_7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %111 = "mhlo.batch_norm_inference"(%110, %cst_6, %cst_5, %cst_4, %cst_3) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %112 = "mhlo.clamp"(%3, %111, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %113 = mhlo.convolution(%112, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %114 = "mhlo.batch_norm_inference"(%113, %cst_2, %cst_1, %cst_0, %cst) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %115 = "mhlo.clamp"(%3, %114, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %116 = mhlo.convolution(%115, %cst_12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %117 = "mhlo.batch_norm_inference"(%116, %cst_11, %cst_10, %cst_9, %cst_8) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %118 = mhlo.convolution(%117, %cst_21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %119 = "mhlo.batch_norm_inference"(%118, %cst_20, %cst_19, %cst_18, %cst_17) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %120 = "mhlo.clamp"(%3, %119, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %121 = mhlo.convolution(%120, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %122 = "mhlo.batch_norm_inference"(%121, %cst_16, %cst_15, %cst_14, %cst_13) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %123 = "mhlo.clamp"(%3, %122, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %124 = mhlo.convolution(%123, %cst_26) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %125 = "mhlo.batch_norm_inference"(%124, %cst_25, %cst_24, %cst_23, %cst_22) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %127 = mhlo.convolution(%126, %cst_35) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %128 = "mhlo.batch_norm_inference"(%127, %cst_34, %cst_33, %cst_32, %cst_31) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %129 = "mhlo.clamp"(%3, %128, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %130 = mhlo.convolution(%129, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %131 = "mhlo.batch_norm_inference"(%130, %cst_30, %cst_29, %cst_28, %cst_27) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %132 = "mhlo.clamp"(%3, %131, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %133 = mhlo.convolution(%132, %cst_40) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %134 = "mhlo.batch_norm_inference"(%133, %cst_39, %cst_38, %cst_37, %cst_36) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %136 = mhlo.convolution(%135, %cst_49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %137 = "mhlo.batch_norm_inference"(%136, %cst_48, %cst_47, %cst_46, %cst_45) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %138 = "mhlo.clamp"(%3, %137, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %139 = "mhlo.pad"(%138, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %140 = mhlo.convolution(%139, %17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
  %141 = "mhlo.batch_norm_inference"(%140, %cst_44, %cst_43, %cst_42, %cst_41) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %142 = "mhlo.clamp"(%3, %141, %0) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %143 = mhlo.convolution(%142, %cst_54) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %144 = "mhlo.batch_norm_inference"(%143, %cst_53, %cst_52, %cst_51, %cst_50) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %145 = mhlo.convolution(%144, %cst_63) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %146 = "mhlo.batch_norm_inference"(%145, %cst_62, %cst_61, %cst_60, %cst_59) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %147 = "mhlo.clamp"(%3, %146, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %148 = mhlo.convolution(%147, %18) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %149 = "mhlo.batch_norm_inference"(%148, %cst_58, %cst_57, %cst_56, %cst_55) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %150 = "mhlo.clamp"(%3, %149, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %151 = mhlo.convolution(%150, %cst_68) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %152 = "mhlo.batch_norm_inference"(%151, %cst_67, %cst_66, %cst_65, %cst_64) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %154 = mhlo.convolution(%153, %cst_77) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %155 = "mhlo.batch_norm_inference"(%154, %cst_76, %cst_75, %cst_74, %cst_73) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %156 = "mhlo.clamp"(%3, %155, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %157 = mhlo.convolution(%156, %19) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %158 = "mhlo.batch_norm_inference"(%157, %cst_72, %cst_71, %cst_70, %cst_69) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %159 = "mhlo.clamp"(%3, %158, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %160 = mhlo.convolution(%159, %cst_82) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %161 = "mhlo.batch_norm_inference"(%160, %cst_81, %cst_80, %cst_79, %cst_78) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %163 = mhlo.convolution(%162, %cst_91) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %164 = "mhlo.batch_norm_inference"(%163, %cst_90, %cst_89, %cst_88, %cst_87) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %165 = "mhlo.clamp"(%3, %164, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %166 = mhlo.convolution(%165, %20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %167 = "mhlo.batch_norm_inference"(%166, %cst_86, %cst_85, %cst_84, %cst_83) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %168 = "mhlo.clamp"(%3, %167, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %169 = mhlo.convolution(%168, %cst_96) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %170 = "mhlo.batch_norm_inference"(%169, %cst_95, %cst_94, %cst_93, %cst_92) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %171 = mhlo.convolution(%170, %cst_232) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %172 = "mhlo.batch_norm_inference"(%171, %cst_231, %cst_230, %cst_229, %cst_228) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %173 = "mhlo.clamp"(%3, %172, %0) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %174 = "mhlo.reduce"(%173, %3) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %175 = chlo.broadcast_divide %174, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %176 = "mhlo.dot"(%175, %cst_243) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_242) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
  %179 = "mhlo.reduce"(%178, %2) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %183 = "mhlo.reduce"(%182, %3) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %185 : tensor<1x1000xf32>
 }

 // -----// IR Dump After Inliner //----- //
 builtin.module  {
  flow.variable @"__iree_flow___sm_node163__m.layer-1.kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node169__m.layer-2.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node170__m.layer-2.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node171__m.layer-2.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node172__m.layer-2.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node181__m.layer-4.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node187__m.layer-5.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node188__m.layer-5.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node189__m.layer-5.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node190__m.layer-5.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node199__m.layer-7.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node205__m.layer-8.gamma" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node206__m.layer-8.beta" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node207__m.layer-8.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node208__m.layer-8.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<16xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node213__m.layer-9.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node219__m.layer-10.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node220__m.layer-10.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node221__m.layer-10.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node222__m.layer-10.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node235__m.layer-13.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node241__m.layer-14.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node242__m.layer-14.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node243__m.layer-14.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node244__m.layer-14.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node253__m.layer-16.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node259__m.layer-17.gamma" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node260__m.layer-17.beta" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node261__m.layer-17.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node262__m.layer-17.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node267__m.layer-18.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node273__m.layer-19.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node274__m.layer-19.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node275__m.layer-19.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node276__m.layer-19.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node285__m.layer-21.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node291__m.layer-22.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node292__m.layer-22.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node293__m.layer-22.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node294__m.layer-22.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node303__m.layer-24.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node309__m.layer-25.gamma" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node310__m.layer-25.beta" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node311__m.layer-25.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node312__m.layer-25.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<24xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node321__m.layer-27.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node327__m.layer-28.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node328__m.layer-28.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node329__m.layer-28.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node330__m.layer-28.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node343__m.layer-31.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node349__m.layer-32.gamma" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node350__m.layer-32.beta" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node351__m.layer-32.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node352__m.layer-32.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<144xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node361__m.layer-34.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node367__m.layer-35.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node368__m.layer-35.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node369__m.layer-35.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node370__m.layer-35.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node375__m.layer-36.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node381__m.layer-37.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node382__m.layer-37.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node383__m.layer-37.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node384__m.layer-37.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node393__m.layer-39.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node399__m.layer-40.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node400__m.layer-40.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node401__m.layer-40.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node402__m.layer-40.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node411__m.layer-42.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node417__m.layer-43.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node418__m.layer-43.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node419__m.layer-43.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node420__m.layer-43.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node429__m.layer-45.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node435__m.layer-46.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node436__m.layer-46.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node437__m.layer-46.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node438__m.layer-46.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node447__m.layer-48.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node453__m.layer-49.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node454__m.layer-49.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node455__m.layer-49.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node456__m.layer-49.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node465__m.layer-51.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node471__m.layer-52.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node472__m.layer-52.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node473__m.layer-52.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node474__m.layer-52.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node483__m.layer-54.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node489__m.layer-55.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node490__m.layer-55.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node491__m.layer-55.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node492__m.layer-55.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node505__m.layer-58.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node511__m.layer-59.gamma" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node512__m.layer-59.beta" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node513__m.layer-59.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node514__m.layer-59.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<192xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node523__m.layer-61.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node529__m.layer-62.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node530__m.layer-62.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node531__m.layer-62.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node532__m.layer-62.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node537__m.layer-63.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node543__m.layer-64.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node544__m.layer-64.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node545__m.layer-64.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node546__m.layer-64.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node555__m.layer-66.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node561__m.layer-67.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node562__m.layer-67.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node563__m.layer-67.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node564__m.layer-67.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node573__m.layer-69.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node579__m.layer-70.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node580__m.layer-70.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node581__m.layer-70.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node582__m.layer-70.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node591__m.layer-72.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node597__m.layer-73.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node598__m.layer-73.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node599__m.layer-73.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node600__m.layer-73.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node609__m.layer-75.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node615__m.layer-76.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node616__m.layer-76.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node617__m.layer-76.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node618__m.layer-76.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node627__m.layer-78.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node633__m.layer-79.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node634__m.layer-79.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node635__m.layer-79.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node636__m.layer-79.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node645__m.layer-81.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node651__m.layer-82.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node652__m.layer-82.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node653__m.layer-82.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node654__m.layer-82.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node663__m.layer-84.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node669__m.layer-85.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node670__m.layer-85.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node671__m.layer-85.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node672__m.layer-85.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node681__m.layer-87.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node687__m.layer-88.gamma" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node688__m.layer-88.beta" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node689__m.layer-88.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node690__m.layer-88.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node699__m.layer-90.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node705__m.layer-91.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node706__m.layer-91.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node707__m.layer-91.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node708__m.layer-91.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node717__m.layer-93.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node723__m.layer-94.gamma" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node724__m.layer-94.beta" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node725__m.layer-94.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node726__m.layer-94.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<384xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node735__m.layer-96.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node741__m.layer-97.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node742__m.layer-97.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node743__m.layer-97.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node744__m.layer-97.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node749__m.layer-98.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node755__m.layer-99.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node756__m.layer-99.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node757__m.layer-99.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node758__m.layer-99.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node767__m.layer-101.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node773__m.layer-102.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node774__m.layer-102.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node775__m.layer-102.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node776__m.layer-102.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node785__m.layer-104.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node791__m.layer-105.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node792__m.layer-105.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node793__m.layer-105.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node794__m.layer-105.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node803__m.layer-107.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node809__m.layer-108.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node810__m.layer-108.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node811__m.layer-108.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node812__m.layer-108.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node821__m.layer-110.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node827__m.layer-111.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node828__m.layer-111.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node829__m.layer-111.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node830__m.layer-111.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node839__m.layer-113.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node845__m.layer-114.gamma" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node846__m.layer-114.beta" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node847__m.layer-114.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node848__m.layer-114.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<96xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node857__m.layer-116.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node863__m.layer-117.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node864__m.layer-117.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node865__m.layer-117.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node866__m.layer-117.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node879__m.layer-120.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node885__m.layer-121.gamma" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node886__m.layer-121.beta" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node887__m.layer-121.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node888__m.layer-121.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<576xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node897__m.layer-123.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node903__m.layer-124.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node904__m.layer-124.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node905__m.layer-124.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node906__m.layer-124.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node911__m.layer-125.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node917__m.layer-126.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node918__m.layer-126.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node919__m.layer-126.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node920__m.layer-126.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node929__m.layer-128.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node935__m.layer-129.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node936__m.layer-129.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node937__m.layer-129.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node938__m.layer-129.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node947__m.layer-131.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node953__m.layer-132.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node954__m.layer-132.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node955__m.layer-132.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node956__m.layer-132.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node965__m.layer-134.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node971__m.layer-135.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node972__m.layer-135.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node973__m.layer-135.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node974__m.layer-135.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node983__m.layer-137.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node989__m.layer-138.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node990__m.layer-138.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node991__m.layer-138.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node992__m.layer-138.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1001__m.layer-140.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1007__m.layer-141.gamma" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1008__m.layer-141.beta" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1009__m.layer-141.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1010__m.layer-141.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<160xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1019__m.layer-143.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1025__m.layer-144.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1026__m.layer-144.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1027__m.layer-144.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1028__m.layer-144.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1037__m.layer-146.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1043__m.layer-147.gamma" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1044__m.layer-147.beta" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1045__m.layer-147.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1046__m.layer-147.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<960xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1055__m.layer-149.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1061__m.layer-150.gamma" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1062__m.layer-150.beta" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1063__m.layer-150.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1064__m.layer-150.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<320xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1069__m.layer-151.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1075__m.layer-152.gamma" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1076__m.layer-152.beta" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1077__m.layer-152.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1078__m.layer-152.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<1280xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1091__m.layer-155.kernel" opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node1092__m.layer-155.bias" opaque<"_", "0xDEADBEEF"> : tensor<1000xf32> attributes {sym_visibility = "private"}
  builtin.func @call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"}} {
    %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x224x224x3xf32>
    %1 = call @"__inference_<lambda>_133580"(%0) : (tensor<1x224x224x3xf32>) -> tensor<1x1000xf32>
    %2 = hal.tensor.cast %1 : tensor<1x1000xf32> -> !hal.buffer_view
    return %2 : !hal.buffer_view
  }
  builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
    %0 = mhlo.constant dense<6.000000e+00> : tensor<f32>
    %1 = mhlo.constant dense<4.900000e+01> : tensor<f32>
    %2 = mhlo.constant dense<0xFF800000> : tensor<f32>
    %3 = mhlo.constant dense<0.000000e+00> : tensor<f32>
    %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
    %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
    %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
    %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
    %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
    %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
    %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
    %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
    %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
    %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
    %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
    %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
    %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
    %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
    %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
    %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %21 = mhlo.convolution(%arg0, %cst_227) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
    %22 = "mhlo.batch_norm_inference"(%21, %cst_226, %cst_225, %cst_224, %cst_223) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %23 = "mhlo.clamp"(%3, %22, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %24 = mhlo.convolution(%23, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
    %25 = "mhlo.batch_norm_inference"(%24, %cst_236, %cst_235, %cst_234, %cst_233) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %26 = "mhlo.clamp"(%3, %25, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %27 = mhlo.convolution(%26, %cst_241) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
    %28 = "mhlo.batch_norm_inference"(%27, %cst_240, %cst_239, %cst_238, %cst_237) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %29 = mhlo.convolution(%28, %cst_105) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
    %30 = "mhlo.batch_norm_inference"(%29, %cst_104, %cst_103, %cst_102, %cst_101) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %31 = "mhlo.clamp"(%3, %30, %0) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
    %32 = "mhlo.pad"(%31, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
    %33 = mhlo.convolution(%32, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
    %34 = "mhlo.batch_norm_inference"(%33, %cst_100, %cst_99, %cst_98, %cst_97) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %35 = "mhlo.clamp"(%3, %34, %0) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
    %36 = mhlo.convolution(%35, %cst_110) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
    %37 = "mhlo.batch_norm_inference"(%36, %cst_109, %cst_108, %cst_107, %cst_106) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %38 = mhlo.convolution(%37, %cst_119) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %39 = "mhlo.batch_norm_inference"(%38, %cst_118, %cst_117, %cst_116, %cst_115) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %40 = "mhlo.clamp"(%3, %39, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %41 = mhlo.convolution(%40, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
    %42 = "mhlo.batch_norm_inference"(%41, %cst_114, %cst_113, %cst_112, %cst_111) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %43 = "mhlo.clamp"(%3, %42, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %44 = mhlo.convolution(%43, %cst_124) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
    %45 = "mhlo.batch_norm_inference"(%44, %cst_123, %cst_122, %cst_121, %cst_120) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
    %47 = mhlo.convolution(%46, %cst_133) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %48 = "mhlo.batch_norm_inference"(%47, %cst_132, %cst_131, %cst_130, %cst_129) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %49 = "mhlo.clamp"(%3, %48, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %50 = "mhlo.pad"(%49, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
    %51 = mhlo.convolution(%50, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
    %52 = "mhlo.batch_norm_inference"(%51, %cst_128, %cst_127, %cst_126, %cst_125) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %53 = "mhlo.clamp"(%3, %52, %0) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
    %54 = mhlo.convolution(%53, %cst_138) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
    %55 = "mhlo.batch_norm_inference"(%54, %cst_137, %cst_136, %cst_135, %cst_134) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %56 = mhlo.convolution(%55, %cst_147) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %57 = "mhlo.batch_norm_inference"(%56, %cst_146, %cst_145, %cst_144, %cst_143) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %58 = "mhlo.clamp"(%3, %57, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %59 = mhlo.convolution(%58, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %60 = "mhlo.batch_norm_inference"(%59, %cst_142, %cst_141, %cst_140, %cst_139) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %61 = "mhlo.clamp"(%3, %60, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %62 = mhlo.convolution(%61, %cst_152) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %63 = "mhlo.batch_norm_inference"(%62, %cst_151, %cst_150, %cst_149, %cst_148) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %65 = mhlo.convolution(%64, %cst_161) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %66 = "mhlo.batch_norm_inference"(%65, %cst_160, %cst_159, %cst_158, %cst_157) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %67 = "mhlo.clamp"(%3, %66, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %68 = mhlo.convolution(%67, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %69 = "mhlo.batch_norm_inference"(%68, %cst_156, %cst_155, %cst_154, %cst_153) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %70 = "mhlo.clamp"(%3, %69, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %71 = mhlo.convolution(%70, %cst_166) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %72 = "mhlo.batch_norm_inference"(%71, %cst_165, %cst_164, %cst_163, %cst_162) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %74 = mhlo.convolution(%73, %cst_175) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %75 = "mhlo.batch_norm_inference"(%74, %cst_174, %cst_173, %cst_172, %cst_171) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %76 = "mhlo.clamp"(%3, %75, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %77 = "mhlo.pad"(%76, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
    %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
    %79 = "mhlo.batch_norm_inference"(%78, %cst_170, %cst_169, %cst_168, %cst_167) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %80 = "mhlo.clamp"(%3, %79, %0) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
    %81 = mhlo.convolution(%80, %cst_180) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
    %82 = "mhlo.batch_norm_inference"(%81, %cst_179, %cst_178, %cst_177, %cst_176) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %83 = mhlo.convolution(%82, %cst_189) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %84 = "mhlo.batch_norm_inference"(%83, %cst_188, %cst_187, %cst_186, %cst_185) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %85 = "mhlo.clamp"(%3, %84, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %86 = mhlo.convolution(%85, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %87 = "mhlo.batch_norm_inference"(%86, %cst_184, %cst_183, %cst_182, %cst_181) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %88 = "mhlo.clamp"(%3, %87, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %89 = mhlo.convolution(%88, %cst_194) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %90 = "mhlo.batch_norm_inference"(%89, %cst_193, %cst_192, %cst_191, %cst_190) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %92 = mhlo.convolution(%91, %cst_203) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %93 = "mhlo.batch_norm_inference"(%92, %cst_202, %cst_201, %cst_200, %cst_199) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %94 = "mhlo.clamp"(%3, %93, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %95 = mhlo.convolution(%94, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %96 = "mhlo.batch_norm_inference"(%95, %cst_198, %cst_197, %cst_196, %cst_195) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %97 = "mhlo.clamp"(%3, %96, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %98 = mhlo.convolution(%97, %cst_208) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %99 = "mhlo.batch_norm_inference"(%98, %cst_207, %cst_206, %cst_205, %cst_204) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %101 = mhlo.convolution(%100, %cst_217) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %102 = "mhlo.batch_norm_inference"(%101, %cst_216, %cst_215, %cst_214, %cst_213) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %103 = "mhlo.clamp"(%3, %102, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %104 = mhlo.convolution(%103, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %105 = "mhlo.batch_norm_inference"(%104, %cst_212, %cst_211, %cst_210, %cst_209) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %106 = "mhlo.clamp"(%3, %105, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %107 = mhlo.convolution(%106, %cst_222) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %108 = "mhlo.batch_norm_inference"(%107, %cst_221, %cst_220, %cst_219, %cst_218) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %110 = mhlo.convolution(%109, %cst_7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %111 = "mhlo.batch_norm_inference"(%110, %cst_6, %cst_5, %cst_4, %cst_3) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %112 = "mhlo.clamp"(%3, %111, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %113 = mhlo.convolution(%112, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %114 = "mhlo.batch_norm_inference"(%113, %cst_2, %cst_1, %cst_0, %cst) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %115 = "mhlo.clamp"(%3, %114, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %116 = mhlo.convolution(%115, %cst_12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
    %117 = "mhlo.batch_norm_inference"(%116, %cst_11, %cst_10, %cst_9, %cst_8) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %118 = mhlo.convolution(%117, %cst_21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %119 = "mhlo.batch_norm_inference"(%118, %cst_20, %cst_19, %cst_18, %cst_17) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %120 = "mhlo.clamp"(%3, %119, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %121 = mhlo.convolution(%120, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %122 = "mhlo.batch_norm_inference"(%121, %cst_16, %cst_15, %cst_14, %cst_13) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %123 = "mhlo.clamp"(%3, %122, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %124 = mhlo.convolution(%123, %cst_26) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %125 = "mhlo.batch_norm_inference"(%124, %cst_25, %cst_24, %cst_23, %cst_22) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %127 = mhlo.convolution(%126, %cst_35) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %128 = "mhlo.batch_norm_inference"(%127, %cst_34, %cst_33, %cst_32, %cst_31) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %129 = "mhlo.clamp"(%3, %128, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %130 = mhlo.convolution(%129, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %131 = "mhlo.batch_norm_inference"(%130, %cst_30, %cst_29, %cst_28, %cst_27) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %132 = "mhlo.clamp"(%3, %131, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %133 = mhlo.convolution(%132, %cst_40) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %134 = "mhlo.batch_norm_inference"(%133, %cst_39, %cst_38, %cst_37, %cst_36) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %136 = mhlo.convolution(%135, %cst_49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %137 = "mhlo.batch_norm_inference"(%136, %cst_48, %cst_47, %cst_46, %cst_45) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %138 = "mhlo.clamp"(%3, %137, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %139 = "mhlo.pad"(%138, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
    %140 = mhlo.convolution(%139, %17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
    %141 = "mhlo.batch_norm_inference"(%140, %cst_44, %cst_43, %cst_42, %cst_41) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %142 = "mhlo.clamp"(%3, %141, %0) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
    %143 = mhlo.convolution(%142, %cst_54) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
    %144 = "mhlo.batch_norm_inference"(%143, %cst_53, %cst_52, %cst_51, %cst_50) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %145 = mhlo.convolution(%144, %cst_63) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %146 = "mhlo.batch_norm_inference"(%145, %cst_62, %cst_61, %cst_60, %cst_59) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %147 = "mhlo.clamp"(%3, %146, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %148 = mhlo.convolution(%147, %18) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %149 = "mhlo.batch_norm_inference"(%148, %cst_58, %cst_57, %cst_56, %cst_55) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %150 = "mhlo.clamp"(%3, %149, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %151 = mhlo.convolution(%150, %cst_68) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %152 = "mhlo.batch_norm_inference"(%151, %cst_67, %cst_66, %cst_65, %cst_64) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %154 = mhlo.convolution(%153, %cst_77) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %155 = "mhlo.batch_norm_inference"(%154, %cst_76, %cst_75, %cst_74, %cst_73) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %156 = "mhlo.clamp"(%3, %155, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %157 = mhlo.convolution(%156, %19) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %158 = "mhlo.batch_norm_inference"(%157, %cst_72, %cst_71, %cst_70, %cst_69) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %159 = "mhlo.clamp"(%3, %158, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %160 = mhlo.convolution(%159, %cst_82) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %161 = "mhlo.batch_norm_inference"(%160, %cst_81, %cst_80, %cst_79, %cst_78) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %163 = mhlo.convolution(%162, %cst_91) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %164 = "mhlo.batch_norm_inference"(%163, %cst_90, %cst_89, %cst_88, %cst_87) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %165 = "mhlo.clamp"(%3, %164, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %166 = mhlo.convolution(%165, %20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %167 = "mhlo.batch_norm_inference"(%166, %cst_86, %cst_85, %cst_84, %cst_83) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %168 = "mhlo.clamp"(%3, %167, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %169 = mhlo.convolution(%168, %cst_96) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
    %170 = "mhlo.batch_norm_inference"(%169, %cst_95, %cst_94, %cst_93, %cst_92) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %171 = mhlo.convolution(%170, %cst_232) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
    %172 = "mhlo.batch_norm_inference"(%171, %cst_231, %cst_230, %cst_229, %cst_228) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %173 = "mhlo.clamp"(%3, %172, %0) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
    %174 = "mhlo.reduce"(%173, %3) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %175 = chlo.broadcast_divide %174, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %176 = "mhlo.dot"(%175, %cst_243) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
    %177 = "mhlo.broadcast_in_dim"(%cst_242) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
    %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
    %179 = "mhlo.reduce"(%178, %2) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
    %183 = "mhlo.reduce"(%182, %3) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    return %185 : tensor<1x1000xf32>
  }
 }


 // -----// IR Dump After Canonicalizer //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %17 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %18 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %19 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %20 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %21 = mhlo.convolution(%arg0, %cst_15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %22 = "mhlo.batch_norm_inference"(%21, %cst_16, %cst_17, %cst_18, %cst_19) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %23 = "mhlo.clamp"(%17, %22, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %24 = mhlo.convolution(%23, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
  %25 = "mhlo.batch_norm_inference"(%24, %cst_6, %cst_7, %cst_8, %cst_9) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %26 = "mhlo.clamp"(%17, %25, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %27 = mhlo.convolution(%26, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %28 = "mhlo.batch_norm_inference"(%27, %cst_2, %cst_3, %cst_4, %cst_5) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %29 = mhlo.convolution(%28, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %30 = "mhlo.batch_norm_inference"(%29, %cst_138, %cst_139, %cst_140, %cst_141) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %31 = "mhlo.clamp"(%17, %30, %20) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %32 = "mhlo.pad"(%31, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %33 = mhlo.convolution(%32, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
  %34 = "mhlo.batch_norm_inference"(%33, %cst_142, %cst_143, %cst_144, %cst_145) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %35 = "mhlo.clamp"(%17, %34, %20) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %36 = mhlo.convolution(%35, %cst_132) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %37 = "mhlo.batch_norm_inference"(%36, %cst_133, %cst_134, %cst_135, %cst_136) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %38 = mhlo.convolution(%37, %cst_123) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %39 = "mhlo.batch_norm_inference"(%38, %cst_124, %cst_125, %cst_126, %cst_127) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %40 = "mhlo.clamp"(%17, %39, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %41 = mhlo.convolution(%40, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
  %42 = "mhlo.batch_norm_inference"(%41, %cst_128, %cst_129, %cst_130, %cst_131) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %43 = "mhlo.clamp"(%17, %42, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %44 = mhlo.convolution(%43, %cst_118) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %45 = "mhlo.batch_norm_inference"(%44, %cst_119, %cst_120, %cst_121, %cst_122) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %47 = mhlo.convolution(%46, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %48 = "mhlo.batch_norm_inference"(%47, %cst_110, %cst_111, %cst_112, %cst_113) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %49 = "mhlo.clamp"(%17, %48, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %50 = "mhlo.pad"(%49, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %51 = mhlo.convolution(%50, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
  %52 = "mhlo.batch_norm_inference"(%51, %cst_114, %cst_115, %cst_116, %cst_117) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %53 = "mhlo.clamp"(%17, %52, %20) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %54 = mhlo.convolution(%53, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %55 = "mhlo.batch_norm_inference"(%54, %cst_105, %cst_106, %cst_107, %cst_108) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %56 = mhlo.convolution(%55, %cst_95) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %57 = "mhlo.batch_norm_inference"(%56, %cst_96, %cst_97, %cst_98, %cst_99) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %58 = "mhlo.clamp"(%17, %57, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %59 = mhlo.convolution(%58, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %60 = "mhlo.batch_norm_inference"(%59, %cst_100, %cst_101, %cst_102, %cst_103) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %61 = "mhlo.clamp"(%17, %60, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %62 = mhlo.convolution(%61, %cst_90) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %63 = "mhlo.batch_norm_inference"(%62, %cst_91, %cst_92, %cst_93, %cst_94) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %65 = mhlo.convolution(%64, %cst_81) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %66 = "mhlo.batch_norm_inference"(%65, %cst_82, %cst_83, %cst_84, %cst_85) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %67 = "mhlo.clamp"(%17, %66, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %68 = mhlo.convolution(%67, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %69 = "mhlo.batch_norm_inference"(%68, %cst_86, %cst_87, %cst_88, %cst_89) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %70 = "mhlo.clamp"(%17, %69, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %71 = mhlo.convolution(%70, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %72 = "mhlo.batch_norm_inference"(%71, %cst_77, %cst_78, %cst_79, %cst_80) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %74 = mhlo.convolution(%73, %cst_67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %75 = "mhlo.batch_norm_inference"(%74, %cst_68, %cst_69, %cst_70, %cst_71) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %76 = "mhlo.clamp"(%17, %75, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %77 = "mhlo.pad"(%76, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
  %79 = "mhlo.batch_norm_inference"(%78, %cst_72, %cst_73, %cst_74, %cst_75) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %80 = "mhlo.clamp"(%17, %79, %20) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %81 = mhlo.convolution(%80, %cst_62) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %82 = "mhlo.batch_norm_inference"(%81, %cst_63, %cst_64, %cst_65, %cst_66) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %83 = mhlo.convolution(%82, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %84 = "mhlo.batch_norm_inference"(%83, %cst_54, %cst_55, %cst_56, %cst_57) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %85 = "mhlo.clamp"(%17, %84, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %86 = mhlo.convolution(%85, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %87 = "mhlo.batch_norm_inference"(%86, %cst_58, %cst_59, %cst_60, %cst_61) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %88 = "mhlo.clamp"(%17, %87, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %89 = mhlo.convolution(%88, %cst_48) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %90 = "mhlo.batch_norm_inference"(%89, %cst_49, %cst_50, %cst_51, %cst_52) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %92 = mhlo.convolution(%91, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %93 = "mhlo.batch_norm_inference"(%92, %cst_40, %cst_41, %cst_42, %cst_43) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %94 = "mhlo.clamp"(%17, %93, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %95 = mhlo.convolution(%94, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %96 = "mhlo.batch_norm_inference"(%95, %cst_44, %cst_45, %cst_46, %cst_47) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %97 = "mhlo.clamp"(%17, %96, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %98 = mhlo.convolution(%97, %cst_34) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %99 = "mhlo.batch_norm_inference"(%98, %cst_35, %cst_36, %cst_37, %cst_38) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %101 = mhlo.convolution(%100, %cst_25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %102 = "mhlo.batch_norm_inference"(%101, %cst_26, %cst_27, %cst_28, %cst_29) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %103 = "mhlo.clamp"(%17, %102, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %104 = mhlo.convolution(%103, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %105 = "mhlo.batch_norm_inference"(%104, %cst_30, %cst_31, %cst_32, %cst_33) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %106 = "mhlo.clamp"(%17, %105, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %107 = mhlo.convolution(%106, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %108 = "mhlo.batch_norm_inference"(%107, %cst_21, %cst_22, %cst_23, %cst_24) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %110 = mhlo.convolution(%109, %cst_235) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %111 = "mhlo.batch_norm_inference"(%110, %cst_236, %cst_237, %cst_238, %cst_239) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %112 = "mhlo.clamp"(%17, %111, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %113 = mhlo.convolution(%112, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %114 = "mhlo.batch_norm_inference"(%113, %cst_240, %cst_241, %cst_242, %cst_243) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %115 = "mhlo.clamp"(%17, %114, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %116 = mhlo.convolution(%115, %cst_230) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %117 = "mhlo.batch_norm_inference"(%116, %cst_231, %cst_232, %cst_233, %cst_234) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %118 = mhlo.convolution(%117, %cst_221) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %119 = "mhlo.batch_norm_inference"(%118, %cst_222, %cst_223, %cst_224, %cst_225) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %120 = "mhlo.clamp"(%17, %119, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %121 = mhlo.convolution(%120, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %122 = "mhlo.batch_norm_inference"(%121, %cst_226, %cst_227, %cst_228, %cst_229) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %123 = "mhlo.clamp"(%17, %122, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %124 = mhlo.convolution(%123, %cst_216) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %125 = "mhlo.batch_norm_inference"(%124, %cst_217, %cst_218, %cst_219, %cst_220) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %127 = mhlo.convolution(%126, %cst_207) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %128 = "mhlo.batch_norm_inference"(%127, %cst_208, %cst_209, %cst_210, %cst_211) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %129 = "mhlo.clamp"(%17, %128, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %130 = mhlo.convolution(%129, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %131 = "mhlo.batch_norm_inference"(%130, %cst_212, %cst_213, %cst_214, %cst_215) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %132 = "mhlo.clamp"(%17, %131, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %133 = mhlo.convolution(%132, %cst_202) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %134 = "mhlo.batch_norm_inference"(%133, %cst_203, %cst_204, %cst_205, %cst_206) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %136 = mhlo.convolution(%135, %cst_193) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %137 = "mhlo.batch_norm_inference"(%136, %cst_194, %cst_195, %cst_196, %cst_197) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %138 = "mhlo.clamp"(%17, %137, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %139 = "mhlo.pad"(%138, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %140 = mhlo.convolution(%139, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
  %141 = "mhlo.batch_norm_inference"(%140, %cst_198, %cst_199, %cst_200, %cst_201) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %142 = "mhlo.clamp"(%17, %141, %20) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %143 = mhlo.convolution(%142, %cst_188) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %144 = "mhlo.batch_norm_inference"(%143, %cst_189, %cst_190, %cst_191, %cst_192) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %145 = mhlo.convolution(%144, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %146 = "mhlo.batch_norm_inference"(%145, %cst_180, %cst_181, %cst_182, %cst_183) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %147 = "mhlo.clamp"(%17, %146, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %148 = mhlo.convolution(%147, %2) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %149 = "mhlo.batch_norm_inference"(%148, %cst_184, %cst_185, %cst_186, %cst_187) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %150 = "mhlo.clamp"(%17, %149, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %151 = mhlo.convolution(%150, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %152 = "mhlo.batch_norm_inference"(%151, %cst_175, %cst_176, %cst_177, %cst_178) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %154 = mhlo.convolution(%153, %cst_165) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %155 = "mhlo.batch_norm_inference"(%154, %cst_166, %cst_167, %cst_168, %cst_169) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %156 = "mhlo.clamp"(%17, %155, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %157 = mhlo.convolution(%156, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %158 = "mhlo.batch_norm_inference"(%157, %cst_170, %cst_171, %cst_172, %cst_173) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %159 = "mhlo.clamp"(%17, %158, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %160 = mhlo.convolution(%159, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %161 = "mhlo.batch_norm_inference"(%160, %cst_161, %cst_162, %cst_163, %cst_164) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %163 = mhlo.convolution(%162, %cst_151) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %164 = "mhlo.batch_norm_inference"(%163, %cst_152, %cst_153, %cst_154, %cst_155) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %165 = "mhlo.clamp"(%17, %164, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %166 = mhlo.convolution(%165, %0) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %167 = "mhlo.batch_norm_inference"(%166, %cst_156, %cst_157, %cst_158, %cst_159) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %168 = "mhlo.clamp"(%17, %167, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %169 = mhlo.convolution(%168, %cst_146) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %170 = "mhlo.batch_norm_inference"(%169, %cst_147, %cst_148, %cst_149, %cst_150) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %171 = mhlo.convolution(%170, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %172 = "mhlo.batch_norm_inference"(%171, %cst_11, %cst_12, %cst_13, %cst_14) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %173 = "mhlo.clamp"(%17, %172, %20) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %174 = "mhlo.reduce"(%173, %17) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %175 = chlo.broadcast_divide %174, %19 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %176 = "mhlo.dot"(%175, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
  %179 = "mhlo.reduce"(%178, %18) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %183 = "mhlo.reduce"(%182, %17) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %185 : tensor<1x1000xf32>
 }

 // -----// IR Dump After SymbolDCE //----- //
 builtin.module  {
  builtin.func @call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"}} {
    %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x224x224x3xf32>
    %1 = call @"__inference_<lambda>_133580"(%0) : (tensor<1x224x224x3xf32>) -> tensor<1x1000xf32>
    %2 = hal.tensor.cast %1 : tensor<1x1000xf32> -> !hal.buffer_view
    return %2 : !hal.buffer_view
  }
  builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
    %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
    %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
    %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
    %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
    %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
    %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
    %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
    %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
    %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
    %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
    %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
    %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
    %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
    %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
    %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
    %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %17 = mhlo.constant dense<0.000000e+00> : tensor<f32>
    %18 = mhlo.constant dense<0xFF800000> : tensor<f32>
    %19 = mhlo.constant dense<4.900000e+01> : tensor<f32>
    %20 = mhlo.constant dense<6.000000e+00> : tensor<f32>
    %21 = mhlo.convolution(%arg0, %cst_15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
    %22 = "mhlo.batch_norm_inference"(%21, %cst_16, %cst_17, %cst_18, %cst_19) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %23 = "mhlo.clamp"(%17, %22, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %24 = mhlo.convolution(%23, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
    %25 = "mhlo.batch_norm_inference"(%24, %cst_6, %cst_7, %cst_8, %cst_9) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %26 = "mhlo.clamp"(%17, %25, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %27 = mhlo.convolution(%26, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
    %28 = "mhlo.batch_norm_inference"(%27, %cst_2, %cst_3, %cst_4, %cst_5) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %29 = mhlo.convolution(%28, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
    %30 = "mhlo.batch_norm_inference"(%29, %cst_138, %cst_139, %cst_140, %cst_141) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %31 = "mhlo.clamp"(%17, %30, %20) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
    %32 = "mhlo.pad"(%31, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
    %33 = mhlo.convolution(%32, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
    %34 = "mhlo.batch_norm_inference"(%33, %cst_142, %cst_143, %cst_144, %cst_145) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %35 = "mhlo.clamp"(%17, %34, %20) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
    %36 = mhlo.convolution(%35, %cst_132) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
    %37 = "mhlo.batch_norm_inference"(%36, %cst_133, %cst_134, %cst_135, %cst_136) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %38 = mhlo.convolution(%37, %cst_123) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %39 = "mhlo.batch_norm_inference"(%38, %cst_124, %cst_125, %cst_126, %cst_127) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %40 = "mhlo.clamp"(%17, %39, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %41 = mhlo.convolution(%40, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
    %42 = "mhlo.batch_norm_inference"(%41, %cst_128, %cst_129, %cst_130, %cst_131) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %43 = "mhlo.clamp"(%17, %42, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %44 = mhlo.convolution(%43, %cst_118) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
    %45 = "mhlo.batch_norm_inference"(%44, %cst_119, %cst_120, %cst_121, %cst_122) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
    %47 = mhlo.convolution(%46, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %48 = "mhlo.batch_norm_inference"(%47, %cst_110, %cst_111, %cst_112, %cst_113) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %49 = "mhlo.clamp"(%17, %48, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %50 = "mhlo.pad"(%49, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
    %51 = mhlo.convolution(%50, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
    %52 = "mhlo.batch_norm_inference"(%51, %cst_114, %cst_115, %cst_116, %cst_117) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %53 = "mhlo.clamp"(%17, %52, %20) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
    %54 = mhlo.convolution(%53, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
    %55 = "mhlo.batch_norm_inference"(%54, %cst_105, %cst_106, %cst_107, %cst_108) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %56 = mhlo.convolution(%55, %cst_95) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %57 = "mhlo.batch_norm_inference"(%56, %cst_96, %cst_97, %cst_98, %cst_99) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %58 = "mhlo.clamp"(%17, %57, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %59 = mhlo.convolution(%58, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %60 = "mhlo.batch_norm_inference"(%59, %cst_100, %cst_101, %cst_102, %cst_103) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %61 = "mhlo.clamp"(%17, %60, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %62 = mhlo.convolution(%61, %cst_90) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %63 = "mhlo.batch_norm_inference"(%62, %cst_91, %cst_92, %cst_93, %cst_94) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %65 = mhlo.convolution(%64, %cst_81) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %66 = "mhlo.batch_norm_inference"(%65, %cst_82, %cst_83, %cst_84, %cst_85) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %67 = "mhlo.clamp"(%17, %66, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %68 = mhlo.convolution(%67, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %69 = "mhlo.batch_norm_inference"(%68, %cst_86, %cst_87, %cst_88, %cst_89) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %70 = "mhlo.clamp"(%17, %69, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %71 = mhlo.convolution(%70, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %72 = "mhlo.batch_norm_inference"(%71, %cst_77, %cst_78, %cst_79, %cst_80) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %74 = mhlo.convolution(%73, %cst_67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %75 = "mhlo.batch_norm_inference"(%74, %cst_68, %cst_69, %cst_70, %cst_71) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %76 = "mhlo.clamp"(%17, %75, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %77 = "mhlo.pad"(%76, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
    %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
    %79 = "mhlo.batch_norm_inference"(%78, %cst_72, %cst_73, %cst_74, %cst_75) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %80 = "mhlo.clamp"(%17, %79, %20) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
    %81 = mhlo.convolution(%80, %cst_62) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
    %82 = "mhlo.batch_norm_inference"(%81, %cst_63, %cst_64, %cst_65, %cst_66) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %83 = mhlo.convolution(%82, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %84 = "mhlo.batch_norm_inference"(%83, %cst_54, %cst_55, %cst_56, %cst_57) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %85 = "mhlo.clamp"(%17, %84, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %86 = mhlo.convolution(%85, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %87 = "mhlo.batch_norm_inference"(%86, %cst_58, %cst_59, %cst_60, %cst_61) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %88 = "mhlo.clamp"(%17, %87, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %89 = mhlo.convolution(%88, %cst_48) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %90 = "mhlo.batch_norm_inference"(%89, %cst_49, %cst_50, %cst_51, %cst_52) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %92 = mhlo.convolution(%91, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %93 = "mhlo.batch_norm_inference"(%92, %cst_40, %cst_41, %cst_42, %cst_43) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %94 = "mhlo.clamp"(%17, %93, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %95 = mhlo.convolution(%94, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %96 = "mhlo.batch_norm_inference"(%95, %cst_44, %cst_45, %cst_46, %cst_47) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %97 = "mhlo.clamp"(%17, %96, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %98 = mhlo.convolution(%97, %cst_34) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %99 = "mhlo.batch_norm_inference"(%98, %cst_35, %cst_36, %cst_37, %cst_38) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %101 = mhlo.convolution(%100, %cst_25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %102 = "mhlo.batch_norm_inference"(%101, %cst_26, %cst_27, %cst_28, %cst_29) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %103 = "mhlo.clamp"(%17, %102, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %104 = mhlo.convolution(%103, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %105 = "mhlo.batch_norm_inference"(%104, %cst_30, %cst_31, %cst_32, %cst_33) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %106 = "mhlo.clamp"(%17, %105, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %107 = mhlo.convolution(%106, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %108 = "mhlo.batch_norm_inference"(%107, %cst_21, %cst_22, %cst_23, %cst_24) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %110 = mhlo.convolution(%109, %cst_235) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %111 = "mhlo.batch_norm_inference"(%110, %cst_236, %cst_237, %cst_238, %cst_239) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %112 = "mhlo.clamp"(%17, %111, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %113 = mhlo.convolution(%112, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %114 = "mhlo.batch_norm_inference"(%113, %cst_240, %cst_241, %cst_242, %cst_243) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %115 = "mhlo.clamp"(%17, %114, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %116 = mhlo.convolution(%115, %cst_230) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
    %117 = "mhlo.batch_norm_inference"(%116, %cst_231, %cst_232, %cst_233, %cst_234) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %118 = mhlo.convolution(%117, %cst_221) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %119 = "mhlo.batch_norm_inference"(%118, %cst_222, %cst_223, %cst_224, %cst_225) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %120 = "mhlo.clamp"(%17, %119, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %121 = mhlo.convolution(%120, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %122 = "mhlo.batch_norm_inference"(%121, %cst_226, %cst_227, %cst_228, %cst_229) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %123 = "mhlo.clamp"(%17, %122, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %124 = mhlo.convolution(%123, %cst_216) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %125 = "mhlo.batch_norm_inference"(%124, %cst_217, %cst_218, %cst_219, %cst_220) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %127 = mhlo.convolution(%126, %cst_207) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %128 = "mhlo.batch_norm_inference"(%127, %cst_208, %cst_209, %cst_210, %cst_211) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %129 = "mhlo.clamp"(%17, %128, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %130 = mhlo.convolution(%129, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %131 = "mhlo.batch_norm_inference"(%130, %cst_212, %cst_213, %cst_214, %cst_215) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %132 = "mhlo.clamp"(%17, %131, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %133 = mhlo.convolution(%132, %cst_202) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %134 = "mhlo.batch_norm_inference"(%133, %cst_203, %cst_204, %cst_205, %cst_206) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %136 = mhlo.convolution(%135, %cst_193) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %137 = "mhlo.batch_norm_inference"(%136, %cst_194, %cst_195, %cst_196, %cst_197) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %138 = "mhlo.clamp"(%17, %137, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %139 = "mhlo.pad"(%138, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
    %140 = mhlo.convolution(%139, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
    %141 = "mhlo.batch_norm_inference"(%140, %cst_198, %cst_199, %cst_200, %cst_201) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %142 = "mhlo.clamp"(%17, %141, %20) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
    %143 = mhlo.convolution(%142, %cst_188) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
    %144 = "mhlo.batch_norm_inference"(%143, %cst_189, %cst_190, %cst_191, %cst_192) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %145 = mhlo.convolution(%144, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %146 = "mhlo.batch_norm_inference"(%145, %cst_180, %cst_181, %cst_182, %cst_183) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %147 = "mhlo.clamp"(%17, %146, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %148 = mhlo.convolution(%147, %2) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %149 = "mhlo.batch_norm_inference"(%148, %cst_184, %cst_185, %cst_186, %cst_187) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %150 = "mhlo.clamp"(%17, %149, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %151 = mhlo.convolution(%150, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %152 = "mhlo.batch_norm_inference"(%151, %cst_175, %cst_176, %cst_177, %cst_178) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %154 = mhlo.convolution(%153, %cst_165) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %155 = "mhlo.batch_norm_inference"(%154, %cst_166, %cst_167, %cst_168, %cst_169) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %156 = "mhlo.clamp"(%17, %155, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %157 = mhlo.convolution(%156, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %158 = "mhlo.batch_norm_inference"(%157, %cst_170, %cst_171, %cst_172, %cst_173) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %159 = "mhlo.clamp"(%17, %158, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %160 = mhlo.convolution(%159, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %161 = "mhlo.batch_norm_inference"(%160, %cst_161, %cst_162, %cst_163, %cst_164) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %163 = mhlo.convolution(%162, %cst_151) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %164 = "mhlo.batch_norm_inference"(%163, %cst_152, %cst_153, %cst_154, %cst_155) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %165 = "mhlo.clamp"(%17, %164, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %166 = mhlo.convolution(%165, %0) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %167 = "mhlo.batch_norm_inference"(%166, %cst_156, %cst_157, %cst_158, %cst_159) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %168 = "mhlo.clamp"(%17, %167, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %169 = mhlo.convolution(%168, %cst_146) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
    %170 = "mhlo.batch_norm_inference"(%169, %cst_147, %cst_148, %cst_149, %cst_150) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %171 = mhlo.convolution(%170, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
    %172 = "mhlo.batch_norm_inference"(%171, %cst_11, %cst_12, %cst_13, %cst_14) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %173 = "mhlo.clamp"(%17, %172, %20) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
    %174 = "mhlo.reduce"(%173, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %175 = chlo.broadcast_divide %174, %19 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %176 = "mhlo.dot"(%175, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
    %177 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
    %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
    %179 = "mhlo.reduce"(%178, %18) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
    %183 = "mhlo.reduce"(%182, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    return %185 : tensor<1x1000xf32>
  }
 }


 // -----// IR Dump After Canonicalizer //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %0 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %1 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %2 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %3 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %21 = mhlo.convolution(%arg0, %cst_227) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %22 = "mhlo.batch_norm_inference"(%21, %cst_226, %cst_225, %cst_224, %cst_223) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %23 = "mhlo.clamp"(%3, %22, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %24 = mhlo.convolution(%23, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
  %25 = "mhlo.batch_norm_inference"(%24, %cst_236, %cst_235, %cst_234, %cst_233) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %26 = "mhlo.clamp"(%3, %25, %0) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %27 = mhlo.convolution(%26, %cst_241) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %28 = "mhlo.batch_norm_inference"(%27, %cst_240, %cst_239, %cst_238, %cst_237) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %29 = mhlo.convolution(%28, %cst_105) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %30 = "mhlo.batch_norm_inference"(%29, %cst_104, %cst_103, %cst_102, %cst_101) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %31 = "mhlo.clamp"(%3, %30, %0) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %32 = "mhlo.pad"(%31, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %33 = mhlo.convolution(%32, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
  %34 = "mhlo.batch_norm_inference"(%33, %cst_100, %cst_99, %cst_98, %cst_97) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %35 = "mhlo.clamp"(%3, %34, %0) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %36 = mhlo.convolution(%35, %cst_110) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %37 = "mhlo.batch_norm_inference"(%36, %cst_109, %cst_108, %cst_107, %cst_106) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %38 = mhlo.convolution(%37, %cst_119) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %39 = "mhlo.batch_norm_inference"(%38, %cst_118, %cst_117, %cst_116, %cst_115) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %40 = "mhlo.clamp"(%3, %39, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %41 = mhlo.convolution(%40, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
  %42 = "mhlo.batch_norm_inference"(%41, %cst_114, %cst_113, %cst_112, %cst_111) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %43 = "mhlo.clamp"(%3, %42, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %44 = mhlo.convolution(%43, %cst_124) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %45 = "mhlo.batch_norm_inference"(%44, %cst_123, %cst_122, %cst_121, %cst_120) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %47 = mhlo.convolution(%46, %cst_133) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %48 = "mhlo.batch_norm_inference"(%47, %cst_132, %cst_131, %cst_130, %cst_129) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %49 = "mhlo.clamp"(%3, %48, %0) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %50 = "mhlo.pad"(%49, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %51 = mhlo.convolution(%50, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
  %52 = "mhlo.batch_norm_inference"(%51, %cst_128, %cst_127, %cst_126, %cst_125) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %53 = "mhlo.clamp"(%3, %52, %0) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %54 = mhlo.convolution(%53, %cst_138) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %55 = "mhlo.batch_norm_inference"(%54, %cst_137, %cst_136, %cst_135, %cst_134) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %56 = mhlo.convolution(%55, %cst_147) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %57 = "mhlo.batch_norm_inference"(%56, %cst_146, %cst_145, %cst_144, %cst_143) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %58 = "mhlo.clamp"(%3, %57, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %59 = mhlo.convolution(%58, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %60 = "mhlo.batch_norm_inference"(%59, %cst_142, %cst_141, %cst_140, %cst_139) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %61 = "mhlo.clamp"(%3, %60, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %62 = mhlo.convolution(%61, %cst_152) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %63 = "mhlo.batch_norm_inference"(%62, %cst_151, %cst_150, %cst_149, %cst_148) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %65 = mhlo.convolution(%64, %cst_161) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %66 = "mhlo.batch_norm_inference"(%65, %cst_160, %cst_159, %cst_158, %cst_157) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %67 = "mhlo.clamp"(%3, %66, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %68 = mhlo.convolution(%67, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %69 = "mhlo.batch_norm_inference"(%68, %cst_156, %cst_155, %cst_154, %cst_153) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %70 = "mhlo.clamp"(%3, %69, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %71 = mhlo.convolution(%70, %cst_166) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %72 = "mhlo.batch_norm_inference"(%71, %cst_165, %cst_164, %cst_163, %cst_162) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %74 = mhlo.convolution(%73, %cst_175) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %75 = "mhlo.batch_norm_inference"(%74, %cst_174, %cst_173, %cst_172, %cst_171) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %76 = "mhlo.clamp"(%3, %75, %0) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %77 = "mhlo.pad"(%76, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
  %79 = "mhlo.batch_norm_inference"(%78, %cst_170, %cst_169, %cst_168, %cst_167) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %80 = "mhlo.clamp"(%3, %79, %0) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %81 = mhlo.convolution(%80, %cst_180) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %82 = "mhlo.batch_norm_inference"(%81, %cst_179, %cst_178, %cst_177, %cst_176) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %83 = mhlo.convolution(%82, %cst_189) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %84 = "mhlo.batch_norm_inference"(%83, %cst_188, %cst_187, %cst_186, %cst_185) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %85 = "mhlo.clamp"(%3, %84, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %86 = mhlo.convolution(%85, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %87 = "mhlo.batch_norm_inference"(%86, %cst_184, %cst_183, %cst_182, %cst_181) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %88 = "mhlo.clamp"(%3, %87, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %89 = mhlo.convolution(%88, %cst_194) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %90 = "mhlo.batch_norm_inference"(%89, %cst_193, %cst_192, %cst_191, %cst_190) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %92 = mhlo.convolution(%91, %cst_203) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %93 = "mhlo.batch_norm_inference"(%92, %cst_202, %cst_201, %cst_200, %cst_199) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %94 = "mhlo.clamp"(%3, %93, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %95 = mhlo.convolution(%94, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %96 = "mhlo.batch_norm_inference"(%95, %cst_198, %cst_197, %cst_196, %cst_195) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %97 = "mhlo.clamp"(%3, %96, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %98 = mhlo.convolution(%97, %cst_208) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %99 = "mhlo.batch_norm_inference"(%98, %cst_207, %cst_206, %cst_205, %cst_204) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %101 = mhlo.convolution(%100, %cst_217) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %102 = "mhlo.batch_norm_inference"(%101, %cst_216, %cst_215, %cst_214, %cst_213) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %103 = "mhlo.clamp"(%3, %102, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %104 = mhlo.convolution(%103, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %105 = "mhlo.batch_norm_inference"(%104, %cst_212, %cst_211, %cst_210, %cst_209) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %106 = "mhlo.clamp"(%3, %105, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %107 = mhlo.convolution(%106, %cst_222) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %108 = "mhlo.batch_norm_inference"(%107, %cst_221, %cst_220, %cst_219, %cst_218) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %110 = mhlo.convolution(%109, %cst_7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %111 = "mhlo.batch_norm_inference"(%110, %cst_6, %cst_5, %cst_4, %cst_3) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %112 = "mhlo.clamp"(%3, %111, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %113 = mhlo.convolution(%112, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %114 = "mhlo.batch_norm_inference"(%113, %cst_2, %cst_1, %cst_0, %cst) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %115 = "mhlo.clamp"(%3, %114, %0) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %116 = mhlo.convolution(%115, %cst_12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %117 = "mhlo.batch_norm_inference"(%116, %cst_11, %cst_10, %cst_9, %cst_8) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %118 = mhlo.convolution(%117, %cst_21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %119 = "mhlo.batch_norm_inference"(%118, %cst_20, %cst_19, %cst_18, %cst_17) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %120 = "mhlo.clamp"(%3, %119, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %121 = mhlo.convolution(%120, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %122 = "mhlo.batch_norm_inference"(%121, %cst_16, %cst_15, %cst_14, %cst_13) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %123 = "mhlo.clamp"(%3, %122, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %124 = mhlo.convolution(%123, %cst_26) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %125 = "mhlo.batch_norm_inference"(%124, %cst_25, %cst_24, %cst_23, %cst_22) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %127 = mhlo.convolution(%126, %cst_35) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %128 = "mhlo.batch_norm_inference"(%127, %cst_34, %cst_33, %cst_32, %cst_31) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %129 = "mhlo.clamp"(%3, %128, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %130 = mhlo.convolution(%129, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %131 = "mhlo.batch_norm_inference"(%130, %cst_30, %cst_29, %cst_28, %cst_27) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %132 = "mhlo.clamp"(%3, %131, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %133 = mhlo.convolution(%132, %cst_40) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %134 = "mhlo.batch_norm_inference"(%133, %cst_39, %cst_38, %cst_37, %cst_36) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %136 = mhlo.convolution(%135, %cst_49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %137 = "mhlo.batch_norm_inference"(%136, %cst_48, %cst_47, %cst_46, %cst_45) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %138 = "mhlo.clamp"(%3, %137, %0) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %139 = "mhlo.pad"(%138, %3) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %140 = mhlo.convolution(%139, %17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
  %141 = "mhlo.batch_norm_inference"(%140, %cst_44, %cst_43, %cst_42, %cst_41) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %142 = "mhlo.clamp"(%3, %141, %0) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %143 = mhlo.convolution(%142, %cst_54) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %144 = "mhlo.batch_norm_inference"(%143, %cst_53, %cst_52, %cst_51, %cst_50) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %145 = mhlo.convolution(%144, %cst_63) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %146 = "mhlo.batch_norm_inference"(%145, %cst_62, %cst_61, %cst_60, %cst_59) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %147 = "mhlo.clamp"(%3, %146, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %148 = mhlo.convolution(%147, %18) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %149 = "mhlo.batch_norm_inference"(%148, %cst_58, %cst_57, %cst_56, %cst_55) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %150 = "mhlo.clamp"(%3, %149, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %151 = mhlo.convolution(%150, %cst_68) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %152 = "mhlo.batch_norm_inference"(%151, %cst_67, %cst_66, %cst_65, %cst_64) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %154 = mhlo.convolution(%153, %cst_77) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %155 = "mhlo.batch_norm_inference"(%154, %cst_76, %cst_75, %cst_74, %cst_73) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %156 = "mhlo.clamp"(%3, %155, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %157 = mhlo.convolution(%156, %19) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %158 = "mhlo.batch_norm_inference"(%157, %cst_72, %cst_71, %cst_70, %cst_69) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %159 = "mhlo.clamp"(%3, %158, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %160 = mhlo.convolution(%159, %cst_82) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %161 = "mhlo.batch_norm_inference"(%160, %cst_81, %cst_80, %cst_79, %cst_78) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %163 = mhlo.convolution(%162, %cst_91) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %164 = "mhlo.batch_norm_inference"(%163, %cst_90, %cst_89, %cst_88, %cst_87) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %165 = "mhlo.clamp"(%3, %164, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %166 = mhlo.convolution(%165, %20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %167 = "mhlo.batch_norm_inference"(%166, %cst_86, %cst_85, %cst_84, %cst_83) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %168 = "mhlo.clamp"(%3, %167, %0) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %169 = mhlo.convolution(%168, %cst_96) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %170 = "mhlo.batch_norm_inference"(%169, %cst_95, %cst_94, %cst_93, %cst_92) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %171 = mhlo.convolution(%170, %cst_232) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %172 = "mhlo.batch_norm_inference"(%171, %cst_231, %cst_230, %cst_229, %cst_228) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %173 = "mhlo.clamp"(%3, %172, %0) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %174 = "mhlo.reduce"(%173, %3) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %175 = chlo.broadcast_divide %174, %1 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %176 = "mhlo.dot"(%175, %cst_243) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_242) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
  %179 = "mhlo.reduce"(%178, %2) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %183 = "mhlo.reduce"(%182, %3) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %185 : tensor<1x1000xf32>
 }

 // -----// IR Dump After Canonicalizer //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
  %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %17 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %18 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %19 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %20 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %21 = mhlo.convolution(%arg0, %cst_15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %22 = "mhlo.batch_norm_inference"(%21, %cst_16, %cst_17, %cst_18, %cst_19) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %23 = "mhlo.clamp"(%17, %22, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %24 = mhlo.convolution(%23, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
  %25 = "mhlo.batch_norm_inference"(%24, %cst_6, %cst_7, %cst_8, %cst_9) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %26 = "mhlo.clamp"(%17, %25, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %27 = mhlo.convolution(%26, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %28 = "mhlo.batch_norm_inference"(%27, %cst_2, %cst_3, %cst_4, %cst_5) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %29 = mhlo.convolution(%28, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %30 = "mhlo.batch_norm_inference"(%29, %cst_138, %cst_139, %cst_140, %cst_141) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %31 = "mhlo.clamp"(%17, %30, %20) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %32 = "mhlo.pad"(%31, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %33 = mhlo.convolution(%32, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
  %34 = "mhlo.batch_norm_inference"(%33, %cst_142, %cst_143, %cst_144, %cst_145) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %35 = "mhlo.clamp"(%17, %34, %20) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %36 = mhlo.convolution(%35, %cst_132) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %37 = "mhlo.batch_norm_inference"(%36, %cst_133, %cst_134, %cst_135, %cst_136) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %38 = mhlo.convolution(%37, %cst_123) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %39 = "mhlo.batch_norm_inference"(%38, %cst_124, %cst_125, %cst_126, %cst_127) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %40 = "mhlo.clamp"(%17, %39, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %41 = mhlo.convolution(%40, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
  %42 = "mhlo.batch_norm_inference"(%41, %cst_128, %cst_129, %cst_130, %cst_131) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %43 = "mhlo.clamp"(%17, %42, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %44 = mhlo.convolution(%43, %cst_118) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %45 = "mhlo.batch_norm_inference"(%44, %cst_119, %cst_120, %cst_121, %cst_122) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %47 = mhlo.convolution(%46, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %48 = "mhlo.batch_norm_inference"(%47, %cst_110, %cst_111, %cst_112, %cst_113) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %49 = "mhlo.clamp"(%17, %48, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %50 = "mhlo.pad"(%49, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %51 = mhlo.convolution(%50, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
  %52 = "mhlo.batch_norm_inference"(%51, %cst_114, %cst_115, %cst_116, %cst_117) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %53 = "mhlo.clamp"(%17, %52, %20) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %54 = mhlo.convolution(%53, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %55 = "mhlo.batch_norm_inference"(%54, %cst_105, %cst_106, %cst_107, %cst_108) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %56 = mhlo.convolution(%55, %cst_95) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %57 = "mhlo.batch_norm_inference"(%56, %cst_96, %cst_97, %cst_98, %cst_99) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %58 = "mhlo.clamp"(%17, %57, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %59 = mhlo.convolution(%58, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %60 = "mhlo.batch_norm_inference"(%59, %cst_100, %cst_101, %cst_102, %cst_103) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %61 = "mhlo.clamp"(%17, %60, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %62 = mhlo.convolution(%61, %cst_90) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %63 = "mhlo.batch_norm_inference"(%62, %cst_91, %cst_92, %cst_93, %cst_94) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %65 = mhlo.convolution(%64, %cst_81) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %66 = "mhlo.batch_norm_inference"(%65, %cst_82, %cst_83, %cst_84, %cst_85) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %67 = "mhlo.clamp"(%17, %66, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %68 = mhlo.convolution(%67, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
  %69 = "mhlo.batch_norm_inference"(%68, %cst_86, %cst_87, %cst_88, %cst_89) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %70 = "mhlo.clamp"(%17, %69, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %71 = mhlo.convolution(%70, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %72 = "mhlo.batch_norm_inference"(%71, %cst_77, %cst_78, %cst_79, %cst_80) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %74 = mhlo.convolution(%73, %cst_67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %75 = "mhlo.batch_norm_inference"(%74, %cst_68, %cst_69, %cst_70, %cst_71) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %76 = "mhlo.clamp"(%17, %75, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %77 = "mhlo.pad"(%76, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
  %79 = "mhlo.batch_norm_inference"(%78, %cst_72, %cst_73, %cst_74, %cst_75) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %80 = "mhlo.clamp"(%17, %79, %20) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %81 = mhlo.convolution(%80, %cst_62) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %82 = "mhlo.batch_norm_inference"(%81, %cst_63, %cst_64, %cst_65, %cst_66) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %83 = mhlo.convolution(%82, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %84 = "mhlo.batch_norm_inference"(%83, %cst_54, %cst_55, %cst_56, %cst_57) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %85 = "mhlo.clamp"(%17, %84, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %86 = mhlo.convolution(%85, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %87 = "mhlo.batch_norm_inference"(%86, %cst_58, %cst_59, %cst_60, %cst_61) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %88 = "mhlo.clamp"(%17, %87, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %89 = mhlo.convolution(%88, %cst_48) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %90 = "mhlo.batch_norm_inference"(%89, %cst_49, %cst_50, %cst_51, %cst_52) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %92 = mhlo.convolution(%91, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %93 = "mhlo.batch_norm_inference"(%92, %cst_40, %cst_41, %cst_42, %cst_43) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %94 = "mhlo.clamp"(%17, %93, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %95 = mhlo.convolution(%94, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %96 = "mhlo.batch_norm_inference"(%95, %cst_44, %cst_45, %cst_46, %cst_47) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %97 = "mhlo.clamp"(%17, %96, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %98 = mhlo.convolution(%97, %cst_34) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %99 = "mhlo.batch_norm_inference"(%98, %cst_35, %cst_36, %cst_37, %cst_38) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %101 = mhlo.convolution(%100, %cst_25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %102 = "mhlo.batch_norm_inference"(%101, %cst_26, %cst_27, %cst_28, %cst_29) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %103 = "mhlo.clamp"(%17, %102, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %104 = mhlo.convolution(%103, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %105 = "mhlo.batch_norm_inference"(%104, %cst_30, %cst_31, %cst_32, %cst_33) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %106 = "mhlo.clamp"(%17, %105, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %107 = mhlo.convolution(%106, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %108 = "mhlo.batch_norm_inference"(%107, %cst_21, %cst_22, %cst_23, %cst_24) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %110 = mhlo.convolution(%109, %cst_235) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %111 = "mhlo.batch_norm_inference"(%110, %cst_236, %cst_237, %cst_238, %cst_239) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %112 = "mhlo.clamp"(%17, %111, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %113 = mhlo.convolution(%112, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
  %114 = "mhlo.batch_norm_inference"(%113, %cst_240, %cst_241, %cst_242, %cst_243) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %115 = "mhlo.clamp"(%17, %114, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %116 = mhlo.convolution(%115, %cst_230) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %117 = "mhlo.batch_norm_inference"(%116, %cst_231, %cst_232, %cst_233, %cst_234) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %118 = mhlo.convolution(%117, %cst_221) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %119 = "mhlo.batch_norm_inference"(%118, %cst_222, %cst_223, %cst_224, %cst_225) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %120 = "mhlo.clamp"(%17, %119, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %121 = mhlo.convolution(%120, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %122 = "mhlo.batch_norm_inference"(%121, %cst_226, %cst_227, %cst_228, %cst_229) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %123 = "mhlo.clamp"(%17, %122, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %124 = mhlo.convolution(%123, %cst_216) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %125 = "mhlo.batch_norm_inference"(%124, %cst_217, %cst_218, %cst_219, %cst_220) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %127 = mhlo.convolution(%126, %cst_207) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %128 = "mhlo.batch_norm_inference"(%127, %cst_208, %cst_209, %cst_210, %cst_211) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %129 = "mhlo.clamp"(%17, %128, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %130 = mhlo.convolution(%129, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
  %131 = "mhlo.batch_norm_inference"(%130, %cst_212, %cst_213, %cst_214, %cst_215) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %132 = "mhlo.clamp"(%17, %131, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %133 = mhlo.convolution(%132, %cst_202) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %134 = "mhlo.batch_norm_inference"(%133, %cst_203, %cst_204, %cst_205, %cst_206) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %136 = mhlo.convolution(%135, %cst_193) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %137 = "mhlo.batch_norm_inference"(%136, %cst_194, %cst_195, %cst_196, %cst_197) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %138 = "mhlo.clamp"(%17, %137, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %139 = "mhlo.pad"(%138, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %140 = mhlo.convolution(%139, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
  %141 = "mhlo.batch_norm_inference"(%140, %cst_198, %cst_199, %cst_200, %cst_201) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %142 = "mhlo.clamp"(%17, %141, %20) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %143 = mhlo.convolution(%142, %cst_188) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %144 = "mhlo.batch_norm_inference"(%143, %cst_189, %cst_190, %cst_191, %cst_192) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %145 = mhlo.convolution(%144, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %146 = "mhlo.batch_norm_inference"(%145, %cst_180, %cst_181, %cst_182, %cst_183) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %147 = "mhlo.clamp"(%17, %146, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %148 = mhlo.convolution(%147, %2) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %149 = "mhlo.batch_norm_inference"(%148, %cst_184, %cst_185, %cst_186, %cst_187) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %150 = "mhlo.clamp"(%17, %149, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %151 = mhlo.convolution(%150, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %152 = "mhlo.batch_norm_inference"(%151, %cst_175, %cst_176, %cst_177, %cst_178) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %154 = mhlo.convolution(%153, %cst_165) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %155 = "mhlo.batch_norm_inference"(%154, %cst_166, %cst_167, %cst_168, %cst_169) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %156 = "mhlo.clamp"(%17, %155, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %157 = mhlo.convolution(%156, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %158 = "mhlo.batch_norm_inference"(%157, %cst_170, %cst_171, %cst_172, %cst_173) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %159 = "mhlo.clamp"(%17, %158, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %160 = mhlo.convolution(%159, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %161 = "mhlo.batch_norm_inference"(%160, %cst_161, %cst_162, %cst_163, %cst_164) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %163 = mhlo.convolution(%162, %cst_151) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %164 = "mhlo.batch_norm_inference"(%163, %cst_152, %cst_153, %cst_154, %cst_155) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %165 = "mhlo.clamp"(%17, %164, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %166 = mhlo.convolution(%165, %0) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
  %167 = "mhlo.batch_norm_inference"(%166, %cst_156, %cst_157, %cst_158, %cst_159) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %168 = "mhlo.clamp"(%17, %167, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %169 = mhlo.convolution(%168, %cst_146) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %170 = "mhlo.batch_norm_inference"(%169, %cst_147, %cst_148, %cst_149, %cst_150) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %171 = mhlo.convolution(%170, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %172 = "mhlo.batch_norm_inference"(%171, %cst_11, %cst_12, %cst_13, %cst_14) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %173 = "mhlo.clamp"(%17, %172, %20) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %174 = "mhlo.reduce"(%173, %17) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %175 = chlo.broadcast_divide %174, %19 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %176 = "mhlo.dot"(%175, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
  %179 = "mhlo.reduce"(%178, %18) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %183 = "mhlo.reduce"(%182, %17) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %186 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%186) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %185 : tensor<1x1000xf32>
 }

 // -----// IR Dump After Inliner //----- //
 builtin.module  {
  builtin.func @call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"}} {
    %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x224x224x3xf32>
    %1 = call @"__inference_<lambda>_133580"(%0) : (tensor<1x224x224x3xf32>) -> tensor<1x1000xf32>
    %2 = hal.tensor.cast %1 : tensor<1x1000xf32> -> !hal.buffer_view
    return %2 : !hal.buffer_view
  }
  builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
    %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x960xf32>
    %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x576xf32>
    %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x384xf32>
    %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x192xf32>
    %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x144xf32>
    %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x96xf32>
    %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x1x32xf32>
    %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
    %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
    %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
    %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
    %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
    %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
    %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
    %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
    %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
    %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
    %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
    %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
    %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
    %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %17 = mhlo.constant dense<0.000000e+00> : tensor<f32>
    %18 = mhlo.constant dense<0xFF800000> : tensor<f32>
    %19 = mhlo.constant dense<4.900000e+01> : tensor<f32>
    %20 = mhlo.constant dense<6.000000e+00> : tensor<f32>
    %21 = mhlo.convolution(%arg0, %cst_15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
    %22 = "mhlo.batch_norm_inference"(%21, %cst_16, %cst_17, %cst_18, %cst_19) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %23 = "mhlo.clamp"(%17, %22, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %24 = mhlo.convolution(%23, %16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x112x112x32xf32>, tensor<3x3x1x32xf32>) -> tensor<1x112x112x32xf32>
    %25 = "mhlo.batch_norm_inference"(%24, %cst_6, %cst_7, %cst_8, %cst_9) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %26 = "mhlo.clamp"(%17, %25, %20) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %27 = mhlo.convolution(%26, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
    %28 = "mhlo.batch_norm_inference"(%27, %cst_2, %cst_3, %cst_4, %cst_5) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %29 = mhlo.convolution(%28, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
    %30 = "mhlo.batch_norm_inference"(%29, %cst_138, %cst_139, %cst_140, %cst_141) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %31 = "mhlo.clamp"(%17, %30, %20) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
    %32 = "mhlo.pad"(%31, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
    %33 = mhlo.convolution(%32, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x1x96xf32>) -> tensor<1x56x56x96xf32>
    %34 = "mhlo.batch_norm_inference"(%33, %cst_142, %cst_143, %cst_144, %cst_145) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %35 = "mhlo.clamp"(%17, %34, %20) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
    %36 = mhlo.convolution(%35, %cst_132) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
    %37 = "mhlo.batch_norm_inference"(%36, %cst_133, %cst_134, %cst_135, %cst_136) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %38 = mhlo.convolution(%37, %cst_123) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %39 = "mhlo.batch_norm_inference"(%38, %cst_124, %cst_125, %cst_126, %cst_127) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %40 = "mhlo.clamp"(%17, %39, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %41 = mhlo.convolution(%40, %14) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x56x56x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x56x56x144xf32>
    %42 = "mhlo.batch_norm_inference"(%41, %cst_128, %cst_129, %cst_130, %cst_131) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %43 = "mhlo.clamp"(%17, %42, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %44 = mhlo.convolution(%43, %cst_118) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
    %45 = "mhlo.batch_norm_inference"(%44, %cst_119, %cst_120, %cst_121, %cst_122) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %46 = chlo.broadcast_add %37, %45 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
    %47 = mhlo.convolution(%46, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %48 = "mhlo.batch_norm_inference"(%47, %cst_110, %cst_111, %cst_112, %cst_113) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %49 = "mhlo.clamp"(%17, %48, %20) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %50 = "mhlo.pad"(%49, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
    %51 = mhlo.convolution(%50, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x1x144xf32>) -> tensor<1x28x28x144xf32>
    %52 = "mhlo.batch_norm_inference"(%51, %cst_114, %cst_115, %cst_116, %cst_117) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %53 = "mhlo.clamp"(%17, %52, %20) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
    %54 = mhlo.convolution(%53, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
    %55 = "mhlo.batch_norm_inference"(%54, %cst_105, %cst_106, %cst_107, %cst_108) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %56 = mhlo.convolution(%55, %cst_95) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %57 = "mhlo.batch_norm_inference"(%56, %cst_96, %cst_97, %cst_98, %cst_99) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %58 = "mhlo.clamp"(%17, %57, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %59 = mhlo.convolution(%58, %12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %60 = "mhlo.batch_norm_inference"(%59, %cst_100, %cst_101, %cst_102, %cst_103) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %61 = "mhlo.clamp"(%17, %60, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %62 = mhlo.convolution(%61, %cst_90) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %63 = "mhlo.batch_norm_inference"(%62, %cst_91, %cst_92, %cst_93, %cst_94) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %64 = chlo.broadcast_add %55, %63 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %65 = mhlo.convolution(%64, %cst_81) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %66 = "mhlo.batch_norm_inference"(%65, %cst_82, %cst_83, %cst_84, %cst_85) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %67 = "mhlo.clamp"(%17, %66, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %68 = mhlo.convolution(%67, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x28x28x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x28x28x192xf32>
    %69 = "mhlo.batch_norm_inference"(%68, %cst_86, %cst_87, %cst_88, %cst_89) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %70 = "mhlo.clamp"(%17, %69, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %71 = mhlo.convolution(%70, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %72 = "mhlo.batch_norm_inference"(%71, %cst_77, %cst_78, %cst_79, %cst_80) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>, tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %73 = chlo.broadcast_add %64, %72 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %74 = mhlo.convolution(%73, %cst_67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %75 = "mhlo.batch_norm_inference"(%74, %cst_68, %cst_69, %cst_70, %cst_71) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %76 = "mhlo.clamp"(%17, %75, %20) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %77 = "mhlo.pad"(%76, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
    %78 = mhlo.convolution(%77, %10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x1x192xf32>) -> tensor<1x14x14x192xf32>
    %79 = "mhlo.batch_norm_inference"(%78, %cst_72, %cst_73, %cst_74, %cst_75) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>, tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %80 = "mhlo.clamp"(%17, %79, %20) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
    %81 = mhlo.convolution(%80, %cst_62) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
    %82 = "mhlo.batch_norm_inference"(%81, %cst_63, %cst_64, %cst_65, %cst_66) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %83 = mhlo.convolution(%82, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %84 = "mhlo.batch_norm_inference"(%83, %cst_54, %cst_55, %cst_56, %cst_57) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %85 = "mhlo.clamp"(%17, %84, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %86 = mhlo.convolution(%85, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %87 = "mhlo.batch_norm_inference"(%86, %cst_58, %cst_59, %cst_60, %cst_61) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %88 = "mhlo.clamp"(%17, %87, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %89 = mhlo.convolution(%88, %cst_48) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %90 = "mhlo.batch_norm_inference"(%89, %cst_49, %cst_50, %cst_51, %cst_52) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %91 = chlo.broadcast_add %82, %90 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %92 = mhlo.convolution(%91, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %93 = "mhlo.batch_norm_inference"(%92, %cst_40, %cst_41, %cst_42, %cst_43) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %94 = "mhlo.clamp"(%17, %93, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %95 = mhlo.convolution(%94, %8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %96 = "mhlo.batch_norm_inference"(%95, %cst_44, %cst_45, %cst_46, %cst_47) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %97 = "mhlo.clamp"(%17, %96, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %98 = mhlo.convolution(%97, %cst_34) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %99 = "mhlo.batch_norm_inference"(%98, %cst_35, %cst_36, %cst_37, %cst_38) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %100 = chlo.broadcast_add %91, %99 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %101 = mhlo.convolution(%100, %cst_25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %102 = "mhlo.batch_norm_inference"(%101, %cst_26, %cst_27, %cst_28, %cst_29) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %103 = "mhlo.clamp"(%17, %102, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %104 = mhlo.convolution(%103, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %105 = "mhlo.batch_norm_inference"(%104, %cst_30, %cst_31, %cst_32, %cst_33) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %106 = "mhlo.clamp"(%17, %105, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %107 = mhlo.convolution(%106, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %108 = "mhlo.batch_norm_inference"(%107, %cst_21, %cst_22, %cst_23, %cst_24) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %109 = chlo.broadcast_add %100, %108 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %110 = mhlo.convolution(%109, %cst_235) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %111 = "mhlo.batch_norm_inference"(%110, %cst_236, %cst_237, %cst_238, %cst_239) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %112 = "mhlo.clamp"(%17, %111, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %113 = mhlo.convolution(%112, %6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x14x14x384xf32>, tensor<3x3x1x384xf32>) -> tensor<1x14x14x384xf32>
    %114 = "mhlo.batch_norm_inference"(%113, %cst_240, %cst_241, %cst_242, %cst_243) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>, tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %115 = "mhlo.clamp"(%17, %114, %20) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %116 = mhlo.convolution(%115, %cst_230) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
    %117 = "mhlo.batch_norm_inference"(%116, %cst_231, %cst_232, %cst_233, %cst_234) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %118 = mhlo.convolution(%117, %cst_221) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %119 = "mhlo.batch_norm_inference"(%118, %cst_222, %cst_223, %cst_224, %cst_225) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %120 = "mhlo.clamp"(%17, %119, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %121 = mhlo.convolution(%120, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %122 = "mhlo.batch_norm_inference"(%121, %cst_226, %cst_227, %cst_228, %cst_229) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %123 = "mhlo.clamp"(%17, %122, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %124 = mhlo.convolution(%123, %cst_216) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %125 = "mhlo.batch_norm_inference"(%124, %cst_217, %cst_218, %cst_219, %cst_220) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %126 = chlo.broadcast_add %117, %125 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %127 = mhlo.convolution(%126, %cst_207) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %128 = "mhlo.batch_norm_inference"(%127, %cst_208, %cst_209, %cst_210, %cst_211) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %129 = "mhlo.clamp"(%17, %128, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %130 = mhlo.convolution(%129, %4) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x14x14x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x14x14x576xf32>
    %131 = "mhlo.batch_norm_inference"(%130, %cst_212, %cst_213, %cst_214, %cst_215) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %132 = "mhlo.clamp"(%17, %131, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %133 = mhlo.convolution(%132, %cst_202) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %134 = "mhlo.batch_norm_inference"(%133, %cst_203, %cst_204, %cst_205, %cst_206) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %135 = chlo.broadcast_add %126, %134 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %136 = mhlo.convolution(%135, %cst_193) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %137 = "mhlo.batch_norm_inference"(%136, %cst_194, %cst_195, %cst_196, %cst_197) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %138 = "mhlo.clamp"(%17, %137, %20) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %139 = "mhlo.pad"(%138, %17) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
    %140 = mhlo.convolution(%139, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x1x576xf32>) -> tensor<1x7x7x576xf32>
    %141 = "mhlo.batch_norm_inference"(%140, %cst_198, %cst_199, %cst_200, %cst_201) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %142 = "mhlo.clamp"(%17, %141, %20) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
    %143 = mhlo.convolution(%142, %cst_188) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
    %144 = "mhlo.batch_norm_inference"(%143, %cst_189, %cst_190, %cst_191, %cst_192) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %145 = mhlo.convolution(%144, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %146 = "mhlo.batch_norm_inference"(%145, %cst_180, %cst_181, %cst_182, %cst_183) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %147 = "mhlo.clamp"(%17, %146, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %148 = mhlo.convolution(%147, %2) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %149 = "mhlo.batch_norm_inference"(%148, %cst_184, %cst_185, %cst_186, %cst_187) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %150 = "mhlo.clamp"(%17, %149, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %151 = mhlo.convolution(%150, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %152 = "mhlo.batch_norm_inference"(%151, %cst_175, %cst_176, %cst_177, %cst_178) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %153 = chlo.broadcast_add %144, %152 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %154 = mhlo.convolution(%153, %cst_165) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %155 = "mhlo.batch_norm_inference"(%154, %cst_166, %cst_167, %cst_168, %cst_169) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %156 = "mhlo.clamp"(%17, %155, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %157 = mhlo.convolution(%156, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %158 = "mhlo.batch_norm_inference"(%157, %cst_170, %cst_171, %cst_172, %cst_173) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %159 = "mhlo.clamp"(%17, %158, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %160 = mhlo.convolution(%159, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %161 = "mhlo.batch_norm_inference"(%160, %cst_161, %cst_162, %cst_163, %cst_164) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>, tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %162 = chlo.broadcast_add %153, %161 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %163 = mhlo.convolution(%162, %cst_151) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %164 = "mhlo.batch_norm_inference"(%163, %cst_152, %cst_153, %cst_154, %cst_155) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %165 = "mhlo.clamp"(%17, %164, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %166 = mhlo.convolution(%165, %0) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[1, 1], [1, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x7x7x960xf32>, tensor<3x3x1x960xf32>) -> tensor<1x7x7x960xf32>
    %167 = "mhlo.batch_norm_inference"(%166, %cst_156, %cst_157, %cst_158, %cst_159) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>, tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %168 = "mhlo.clamp"(%17, %167, %20) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %169 = mhlo.convolution(%168, %cst_146) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
    %170 = "mhlo.batch_norm_inference"(%169, %cst_147, %cst_148, %cst_149, %cst_150) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>, tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %171 = mhlo.convolution(%170, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
    %172 = "mhlo.batch_norm_inference"(%171, %cst_11, %cst_12, %cst_13, %cst_14) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>, tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %173 = "mhlo.clamp"(%17, %172, %20) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
    %174 = "mhlo.reduce"(%173, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %175 = chlo.broadcast_divide %174, %19 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %176 = "mhlo.dot"(%175, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
    %177 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
    %178 = mhlo.add %176, %177 : tensor<1x1000xf32>
    %179 = "mhlo.reduce"(%178, %18) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.maximum %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %180 = linalg.tensor_expand_shape %179 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %181 = chlo.broadcast_subtract %178, %180 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    %182 = "mhlo.exponential"(%181) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
    %183 = "mhlo.reduce"(%182, %17) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %186 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%186) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %184 = linalg.tensor_expand_shape %183 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %185 = chlo.broadcast_divide %182, %184 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    return %185 : tensor<1x1000xf32>
  }
 }


 // -----// IR Dump After MHLOToMHLOPreprocessing //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %0 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %1 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %2 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %3 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %21 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %22 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %23 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %24 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %25 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %26 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %27 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %28 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %29 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %30 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %31 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %32 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %33 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %34 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %35 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %36 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %37 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %38 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %39 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %40 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %41 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %42 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %43 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %44 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %45 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %46 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %47 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %48 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %49 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %50 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %51 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %52 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %53 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %54 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %55 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %56 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %57 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %58 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %59 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %60 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %61 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %62 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %63 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %64 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %65 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %66 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %67 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %68 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %69 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %cst_192 = constant dense<0.000000e+00> : tensor<f32>
  %70 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %71 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %72 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %73 = "mhlo.pad"(%arg0, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x224x224x3xf32>, tensor<f32>) -> tensor<1x225x225x3xf32>
  %74 = mhlo.convolution(%73, %cst_12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %75 = "mhlo.broadcast_in_dim"(%cst_13) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %76 = "mhlo.broadcast_in_dim"(%cst_14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %77 = "mhlo.broadcast_in_dim"(%cst_15) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %78 = "mhlo.broadcast_in_dim"(%4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %79 = mhlo.subtract %74, %77 : tensor<1x112x112x32xf32>
  %80 = mhlo.multiply %79, %75 : tensor<1x112x112x32xf32>
  %81 = mhlo.divide %80, %78 : tensor<1x112x112x32xf32>
  %82 = mhlo.add %81, %76 : tensor<1x112x112x32xf32>
  %83 = "mhlo.clamp"(%0, %82, %3) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %84 = "mhlo.pad"(%83, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x114x114x32xf32>
  %85 = mhlo.convolution(%84, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x114x114x32xf32>, tensor<3x3x32x1xf32>) -> tensor<1x112x112x32xf32>
  %86 = "mhlo.broadcast_in_dim"(%cst_5) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %87 = "mhlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %88 = "mhlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %89 = "mhlo.broadcast_in_dim"(%6) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %90 = mhlo.subtract %85, %88 : tensor<1x112x112x32xf32>
  %91 = mhlo.multiply %90, %86 : tensor<1x112x112x32xf32>
  %92 = mhlo.divide %91, %89 : tensor<1x112x112x32xf32>
  %93 = mhlo.add %92, %87 : tensor<1x112x112x32xf32>
  %94 = "mhlo.clamp"(%0, %93, %3) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %95 = mhlo.convolution(%94, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %96 = "mhlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %97 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %98 = "mhlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %99 = "mhlo.broadcast_in_dim"(%7) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %100 = mhlo.subtract %95, %98 : tensor<1x112x112x16xf32>
  %101 = mhlo.multiply %100, %96 : tensor<1x112x112x16xf32>
  %102 = mhlo.divide %101, %99 : tensor<1x112x112x16xf32>
  %103 = mhlo.add %102, %97 : tensor<1x112x112x16xf32>
  %104 = mhlo.convolution(%103, %cst_108) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %105 = "mhlo.broadcast_in_dim"(%cst_109) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %106 = "mhlo.broadcast_in_dim"(%cst_110) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %107 = "mhlo.broadcast_in_dim"(%cst_111) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %108 = "mhlo.broadcast_in_dim"(%8) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %109 = mhlo.subtract %104, %107 : tensor<1x112x112x96xf32>
  %110 = mhlo.multiply %109, %105 : tensor<1x112x112x96xf32>
  %111 = mhlo.divide %110, %108 : tensor<1x112x112x96xf32>
  %112 = mhlo.add %111, %106 : tensor<1x112x112x96xf32>
  %113 = "mhlo.clamp"(%0, %112, %3) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %114 = "mhlo.pad"(%113, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %115 = mhlo.convolution(%114, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x96x1xf32>) -> tensor<1x56x56x96xf32>
  %116 = "mhlo.broadcast_in_dim"(%cst_112) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %117 = "mhlo.broadcast_in_dim"(%cst_113) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %118 = "mhlo.broadcast_in_dim"(%cst_114) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %119 = "mhlo.broadcast_in_dim"(%10) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %120 = mhlo.subtract %115, %118 : tensor<1x56x56x96xf32>
  %121 = mhlo.multiply %120, %116 : tensor<1x56x56x96xf32>
  %122 = mhlo.divide %121, %119 : tensor<1x56x56x96xf32>
  %123 = mhlo.add %122, %117 : tensor<1x56x56x96xf32>
  %124 = "mhlo.clamp"(%0, %123, %3) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %125 = mhlo.convolution(%124, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %126 = "mhlo.broadcast_in_dim"(%cst_105) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %127 = "mhlo.broadcast_in_dim"(%cst_106) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %128 = "mhlo.broadcast_in_dim"(%cst_107) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %129 = "mhlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %130 = mhlo.subtract %125, %128 : tensor<1x56x56x24xf32>
  %131 = mhlo.multiply %130, %126 : tensor<1x56x56x24xf32>
  %132 = mhlo.divide %131, %129 : tensor<1x56x56x24xf32>
  %133 = mhlo.add %132, %127 : tensor<1x56x56x24xf32>
  %134 = mhlo.convolution(%133, %cst_97) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %135 = "mhlo.broadcast_in_dim"(%cst_98) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %136 = "mhlo.broadcast_in_dim"(%cst_99) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %137 = "mhlo.broadcast_in_dim"(%cst_100) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %138 = "mhlo.broadcast_in_dim"(%12) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %139 = mhlo.subtract %134, %137 : tensor<1x56x56x144xf32>
  %140 = mhlo.multiply %139, %135 : tensor<1x56x56x144xf32>
  %141 = mhlo.divide %140, %138 : tensor<1x56x56x144xf32>
  %142 = mhlo.add %141, %136 : tensor<1x56x56x144xf32>
  %143 = "mhlo.clamp"(%0, %142, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %144 = "mhlo.pad"(%143, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x58x58x144xf32>
  %145 = mhlo.convolution(%144, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x58x58x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x56x56x144xf32>
  %146 = "mhlo.broadcast_in_dim"(%cst_101) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %147 = "mhlo.broadcast_in_dim"(%cst_102) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %148 = "mhlo.broadcast_in_dim"(%cst_103) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %149 = "mhlo.broadcast_in_dim"(%14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %150 = mhlo.subtract %145, %148 : tensor<1x56x56x144xf32>
  %151 = mhlo.multiply %150, %146 : tensor<1x56x56x144xf32>
  %152 = mhlo.divide %151, %149 : tensor<1x56x56x144xf32>
  %153 = mhlo.add %152, %147 : tensor<1x56x56x144xf32>
  %154 = "mhlo.clamp"(%0, %153, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %155 = mhlo.convolution(%154, %cst_93) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %156 = "mhlo.broadcast_in_dim"(%cst_94) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %157 = "mhlo.broadcast_in_dim"(%cst_95) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %158 = "mhlo.broadcast_in_dim"(%cst_96) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %159 = "mhlo.broadcast_in_dim"(%15) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %160 = mhlo.subtract %155, %158 : tensor<1x56x56x24xf32>
  %161 = mhlo.multiply %160, %156 : tensor<1x56x56x24xf32>
  %162 = mhlo.divide %161, %159 : tensor<1x56x56x24xf32>
  %163 = mhlo.add %162, %157 : tensor<1x56x56x24xf32>
  %164 = chlo.broadcast_add %133, %163 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %165 = mhlo.convolution(%164, %cst_86) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %166 = "mhlo.broadcast_in_dim"(%cst_87) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %167 = "mhlo.broadcast_in_dim"(%cst_88) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %168 = "mhlo.broadcast_in_dim"(%cst_89) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %169 = "mhlo.broadcast_in_dim"(%16) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %170 = mhlo.subtract %165, %168 : tensor<1x56x56x144xf32>
  %171 = mhlo.multiply %170, %166 : tensor<1x56x56x144xf32>
  %172 = mhlo.divide %171, %169 : tensor<1x56x56x144xf32>
  %173 = mhlo.add %172, %167 : tensor<1x56x56x144xf32>
  %174 = "mhlo.clamp"(%0, %173, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %175 = "mhlo.pad"(%174, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %176 = mhlo.convolution(%175, %17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x28x28x144xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_90) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %178 = "mhlo.broadcast_in_dim"(%cst_91) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %179 = "mhlo.broadcast_in_dim"(%cst_92) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %180 = "mhlo.broadcast_in_dim"(%18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %181 = mhlo.subtract %176, %179 : tensor<1x28x28x144xf32>
  %182 = mhlo.multiply %181, %177 : tensor<1x28x28x144xf32>
  %183 = mhlo.divide %182, %180 : tensor<1x28x28x144xf32>
  %184 = mhlo.add %183, %178 : tensor<1x28x28x144xf32>
  %185 = "mhlo.clamp"(%0, %184, %3) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %186 = mhlo.convolution(%185, %cst_82) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %187 = "mhlo.broadcast_in_dim"(%cst_83) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %188 = "mhlo.broadcast_in_dim"(%cst_84) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %189 = "mhlo.broadcast_in_dim"(%cst_85) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %190 = "mhlo.broadcast_in_dim"(%19) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %191 = mhlo.subtract %186, %189 : tensor<1x28x28x32xf32>
  %192 = mhlo.multiply %191, %187 : tensor<1x28x28x32xf32>
  %193 = mhlo.divide %192, %190 : tensor<1x28x28x32xf32>
  %194 = mhlo.add %193, %188 : tensor<1x28x28x32xf32>
  %195 = mhlo.convolution(%194, %cst_75) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %196 = "mhlo.broadcast_in_dim"(%cst_76) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %197 = "mhlo.broadcast_in_dim"(%cst_77) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %198 = "mhlo.broadcast_in_dim"(%cst_78) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %199 = "mhlo.broadcast_in_dim"(%20) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %200 = mhlo.subtract %195, %198 : tensor<1x28x28x192xf32>
  %201 = mhlo.multiply %200, %196 : tensor<1x28x28x192xf32>
  %202 = mhlo.divide %201, %199 : tensor<1x28x28x192xf32>
  %203 = mhlo.add %202, %197 : tensor<1x28x28x192xf32>
  %204 = "mhlo.clamp"(%0, %203, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %205 = "mhlo.pad"(%204, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
  %206 = mhlo.convolution(%205, %21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
  %207 = "mhlo.broadcast_in_dim"(%cst_79) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %208 = "mhlo.broadcast_in_dim"(%cst_80) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %209 = "mhlo.broadcast_in_dim"(%cst_81) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %210 = "mhlo.broadcast_in_dim"(%22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %211 = mhlo.subtract %206, %209 : tensor<1x28x28x192xf32>
  %212 = mhlo.multiply %211, %207 : tensor<1x28x28x192xf32>
  %213 = mhlo.divide %212, %210 : tensor<1x28x28x192xf32>
  %214 = mhlo.add %213, %208 : tensor<1x28x28x192xf32>
  %215 = "mhlo.clamp"(%0, %214, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %216 = mhlo.convolution(%215, %cst_71) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %217 = "mhlo.broadcast_in_dim"(%cst_72) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %218 = "mhlo.broadcast_in_dim"(%cst_73) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %219 = "mhlo.broadcast_in_dim"(%cst_74) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %220 = "mhlo.broadcast_in_dim"(%23) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %221 = mhlo.subtract %216, %219 : tensor<1x28x28x32xf32>
  %222 = mhlo.multiply %221, %217 : tensor<1x28x28x32xf32>
  %223 = mhlo.divide %222, %220 : tensor<1x28x28x32xf32>
  %224 = mhlo.add %223, %218 : tensor<1x28x28x32xf32>
  %225 = chlo.broadcast_add %194, %224 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %226 = mhlo.convolution(%225, %cst_64) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %227 = "mhlo.broadcast_in_dim"(%cst_65) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %228 = "mhlo.broadcast_in_dim"(%cst_66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %229 = "mhlo.broadcast_in_dim"(%cst_67) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %230 = "mhlo.broadcast_in_dim"(%24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %231 = mhlo.subtract %226, %229 : tensor<1x28x28x192xf32>
  %232 = mhlo.multiply %231, %227 : tensor<1x28x28x192xf32>
  %233 = mhlo.divide %232, %230 : tensor<1x28x28x192xf32>
  %234 = mhlo.add %233, %228 : tensor<1x28x28x192xf32>
  %235 = "mhlo.clamp"(%0, %234, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %236 = "mhlo.pad"(%235, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
  %237 = mhlo.convolution(%236, %25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
  %238 = "mhlo.broadcast_in_dim"(%cst_68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %239 = "mhlo.broadcast_in_dim"(%cst_69) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %240 = "mhlo.broadcast_in_dim"(%cst_70) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %241 = "mhlo.broadcast_in_dim"(%26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %242 = mhlo.subtract %237, %240 : tensor<1x28x28x192xf32>
  %243 = mhlo.multiply %242, %238 : tensor<1x28x28x192xf32>
  %244 = mhlo.divide %243, %241 : tensor<1x28x28x192xf32>
  %245 = mhlo.add %244, %239 : tensor<1x28x28x192xf32>
  %246 = "mhlo.clamp"(%0, %245, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %247 = mhlo.convolution(%246, %cst_60) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %248 = "mhlo.broadcast_in_dim"(%cst_61) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %249 = "mhlo.broadcast_in_dim"(%cst_62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %250 = "mhlo.broadcast_in_dim"(%cst_63) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %251 = "mhlo.broadcast_in_dim"(%27) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %252 = mhlo.subtract %247, %250 : tensor<1x28x28x32xf32>
  %253 = mhlo.multiply %252, %248 : tensor<1x28x28x32xf32>
  %254 = mhlo.divide %253, %251 : tensor<1x28x28x32xf32>
  %255 = mhlo.add %254, %249 : tensor<1x28x28x32xf32>
  %256 = chlo.broadcast_add %225, %255 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %257 = mhlo.convolution(%256, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %258 = "mhlo.broadcast_in_dim"(%cst_54) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %259 = "mhlo.broadcast_in_dim"(%cst_55) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %260 = "mhlo.broadcast_in_dim"(%cst_56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %261 = "mhlo.broadcast_in_dim"(%28) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %262 = mhlo.subtract %257, %260 : tensor<1x28x28x192xf32>
  %263 = mhlo.multiply %262, %258 : tensor<1x28x28x192xf32>
  %264 = mhlo.divide %263, %261 : tensor<1x28x28x192xf32>
  %265 = mhlo.add %264, %259 : tensor<1x28x28x192xf32>
  %266 = "mhlo.clamp"(%0, %265, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %267 = "mhlo.pad"(%266, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %268 = mhlo.convolution(%267, %29) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x14x14x192xf32>
  %269 = "mhlo.broadcast_in_dim"(%cst_57) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %270 = "mhlo.broadcast_in_dim"(%cst_58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %271 = "mhlo.broadcast_in_dim"(%cst_59) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %272 = "mhlo.broadcast_in_dim"(%30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %273 = mhlo.subtract %268, %271 : tensor<1x14x14x192xf32>
  %274 = mhlo.multiply %273, %269 : tensor<1x14x14x192xf32>
  %275 = mhlo.divide %274, %272 : tensor<1x14x14x192xf32>
  %276 = mhlo.add %275, %270 : tensor<1x14x14x192xf32>
  %277 = "mhlo.clamp"(%0, %276, %3) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %278 = mhlo.convolution(%277, %cst_49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %279 = "mhlo.broadcast_in_dim"(%cst_50) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %280 = "mhlo.broadcast_in_dim"(%cst_51) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %281 = "mhlo.broadcast_in_dim"(%cst_52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %282 = "mhlo.broadcast_in_dim"(%31) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %283 = mhlo.subtract %278, %281 : tensor<1x14x14x64xf32>
  %284 = mhlo.multiply %283, %279 : tensor<1x14x14x64xf32>
  %285 = mhlo.divide %284, %282 : tensor<1x14x14x64xf32>
  %286 = mhlo.add %285, %280 : tensor<1x14x14x64xf32>
  %287 = mhlo.convolution(%286, %cst_42) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %288 = "mhlo.broadcast_in_dim"(%cst_43) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %289 = "mhlo.broadcast_in_dim"(%cst_44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %290 = "mhlo.broadcast_in_dim"(%cst_45) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %291 = "mhlo.broadcast_in_dim"(%32) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %292 = mhlo.subtract %287, %290 : tensor<1x14x14x384xf32>
  %293 = mhlo.multiply %292, %288 : tensor<1x14x14x384xf32>
  %294 = mhlo.divide %293, %291 : tensor<1x14x14x384xf32>
  %295 = mhlo.add %294, %289 : tensor<1x14x14x384xf32>
  %296 = "mhlo.clamp"(%0, %295, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %297 = "mhlo.pad"(%296, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %298 = mhlo.convolution(%297, %33) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %299 = "mhlo.broadcast_in_dim"(%cst_46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %300 = "mhlo.broadcast_in_dim"(%cst_47) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %301 = "mhlo.broadcast_in_dim"(%cst_48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %302 = "mhlo.broadcast_in_dim"(%34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %303 = mhlo.subtract %298, %301 : tensor<1x14x14x384xf32>
  %304 = mhlo.multiply %303, %299 : tensor<1x14x14x384xf32>
  %305 = mhlo.divide %304, %302 : tensor<1x14x14x384xf32>
  %306 = mhlo.add %305, %300 : tensor<1x14x14x384xf32>
  %307 = "mhlo.clamp"(%0, %306, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %308 = mhlo.convolution(%307, %cst_38) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %309 = "mhlo.broadcast_in_dim"(%cst_39) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %310 = "mhlo.broadcast_in_dim"(%cst_40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %311 = "mhlo.broadcast_in_dim"(%cst_41) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %312 = "mhlo.broadcast_in_dim"(%35) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %313 = mhlo.subtract %308, %311 : tensor<1x14x14x64xf32>
  %314 = mhlo.multiply %313, %309 : tensor<1x14x14x64xf32>
  %315 = mhlo.divide %314, %312 : tensor<1x14x14x64xf32>
  %316 = mhlo.add %315, %310 : tensor<1x14x14x64xf32>
  %317 = chlo.broadcast_add %286, %316 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %318 = mhlo.convolution(%317, %cst_31) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %319 = "mhlo.broadcast_in_dim"(%cst_32) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %320 = "mhlo.broadcast_in_dim"(%cst_33) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %321 = "mhlo.broadcast_in_dim"(%cst_34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %322 = "mhlo.broadcast_in_dim"(%36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %323 = mhlo.subtract %318, %321 : tensor<1x14x14x384xf32>
  %324 = mhlo.multiply %323, %319 : tensor<1x14x14x384xf32>
  %325 = mhlo.divide %324, %322 : tensor<1x14x14x384xf32>
  %326 = mhlo.add %325, %320 : tensor<1x14x14x384xf32>
  %327 = "mhlo.clamp"(%0, %326, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %328 = "mhlo.pad"(%327, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %329 = mhlo.convolution(%328, %37) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %330 = "mhlo.broadcast_in_dim"(%cst_35) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %331 = "mhlo.broadcast_in_dim"(%cst_36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %332 = "mhlo.broadcast_in_dim"(%cst_37) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %333 = "mhlo.broadcast_in_dim"(%38) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %334 = mhlo.subtract %329, %332 : tensor<1x14x14x384xf32>
  %335 = mhlo.multiply %334, %330 : tensor<1x14x14x384xf32>
  %336 = mhlo.divide %335, %333 : tensor<1x14x14x384xf32>
  %337 = mhlo.add %336, %331 : tensor<1x14x14x384xf32>
  %338 = "mhlo.clamp"(%0, %337, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %339 = mhlo.convolution(%338, %cst_27) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %340 = "mhlo.broadcast_in_dim"(%cst_28) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %341 = "mhlo.broadcast_in_dim"(%cst_29) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %342 = "mhlo.broadcast_in_dim"(%cst_30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %343 = "mhlo.broadcast_in_dim"(%39) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %344 = mhlo.subtract %339, %342 : tensor<1x14x14x64xf32>
  %345 = mhlo.multiply %344, %340 : tensor<1x14x14x64xf32>
  %346 = mhlo.divide %345, %343 : tensor<1x14x14x64xf32>
  %347 = mhlo.add %346, %341 : tensor<1x14x14x64xf32>
  %348 = chlo.broadcast_add %317, %347 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %349 = mhlo.convolution(%348, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %350 = "mhlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %351 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %352 = "mhlo.broadcast_in_dim"(%cst_23) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %353 = "mhlo.broadcast_in_dim"(%40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %354 = mhlo.subtract %349, %352 : tensor<1x14x14x384xf32>
  %355 = mhlo.multiply %354, %350 : tensor<1x14x14x384xf32>
  %356 = mhlo.divide %355, %353 : tensor<1x14x14x384xf32>
  %357 = mhlo.add %356, %351 : tensor<1x14x14x384xf32>
  %358 = "mhlo.clamp"(%0, %357, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %359 = "mhlo.pad"(%358, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %360 = mhlo.convolution(%359, %41) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %361 = "mhlo.broadcast_in_dim"(%cst_24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %362 = "mhlo.broadcast_in_dim"(%cst_25) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %363 = "mhlo.broadcast_in_dim"(%cst_26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %364 = "mhlo.broadcast_in_dim"(%42) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %365 = mhlo.subtract %360, %363 : tensor<1x14x14x384xf32>
  %366 = mhlo.multiply %365, %361 : tensor<1x14x14x384xf32>
  %367 = mhlo.divide %366, %364 : tensor<1x14x14x384xf32>
  %368 = mhlo.add %367, %362 : tensor<1x14x14x384xf32>
  %369 = "mhlo.clamp"(%0, %368, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %370 = mhlo.convolution(%369, %cst_16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %371 = "mhlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %372 = "mhlo.broadcast_in_dim"(%cst_18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %373 = "mhlo.broadcast_in_dim"(%cst_19) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %374 = "mhlo.broadcast_in_dim"(%43) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %375 = mhlo.subtract %370, %373 : tensor<1x14x14x64xf32>
  %376 = mhlo.multiply %375, %371 : tensor<1x14x14x64xf32>
  %377 = mhlo.divide %376, %374 : tensor<1x14x14x64xf32>
  %378 = mhlo.add %377, %372 : tensor<1x14x14x64xf32>
  %379 = chlo.broadcast_add %348, %378 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %380 = mhlo.convolution(%379, %cst_185) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %381 = "mhlo.broadcast_in_dim"(%cst_186) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %382 = "mhlo.broadcast_in_dim"(%cst_187) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %383 = "mhlo.broadcast_in_dim"(%cst_188) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %384 = "mhlo.broadcast_in_dim"(%44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %385 = mhlo.subtract %380, %383 : tensor<1x14x14x384xf32>
  %386 = mhlo.multiply %385, %381 : tensor<1x14x14x384xf32>
  %387 = mhlo.divide %386, %384 : tensor<1x14x14x384xf32>
  %388 = mhlo.add %387, %382 : tensor<1x14x14x384xf32>
  %389 = "mhlo.clamp"(%0, %388, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %390 = "mhlo.pad"(%389, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %391 = mhlo.convolution(%390, %45) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %392 = "mhlo.broadcast_in_dim"(%cst_189) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %393 = "mhlo.broadcast_in_dim"(%cst_190) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %394 = "mhlo.broadcast_in_dim"(%cst_191) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %395 = "mhlo.broadcast_in_dim"(%46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %396 = mhlo.subtract %391, %394 : tensor<1x14x14x384xf32>
  %397 = mhlo.multiply %396, %392 : tensor<1x14x14x384xf32>
  %398 = mhlo.divide %397, %395 : tensor<1x14x14x384xf32>
  %399 = mhlo.add %398, %393 : tensor<1x14x14x384xf32>
  %400 = "mhlo.clamp"(%0, %399, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %401 = mhlo.convolution(%400, %cst_181) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %402 = "mhlo.broadcast_in_dim"(%cst_182) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %403 = "mhlo.broadcast_in_dim"(%cst_183) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %404 = "mhlo.broadcast_in_dim"(%cst_184) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %405 = "mhlo.broadcast_in_dim"(%47) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %406 = mhlo.subtract %401, %404 : tensor<1x14x14x96xf32>
  %407 = mhlo.multiply %406, %402 : tensor<1x14x14x96xf32>
  %408 = mhlo.divide %407, %405 : tensor<1x14x14x96xf32>
  %409 = mhlo.add %408, %403 : tensor<1x14x14x96xf32>
  %410 = mhlo.convolution(%409, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %411 = "mhlo.broadcast_in_dim"(%cst_175) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %412 = "mhlo.broadcast_in_dim"(%cst_176) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %413 = "mhlo.broadcast_in_dim"(%cst_177) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %414 = "mhlo.broadcast_in_dim"(%48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %415 = mhlo.subtract %410, %413 : tensor<1x14x14x576xf32>
  %416 = mhlo.multiply %415, %411 : tensor<1x14x14x576xf32>
  %417 = mhlo.divide %416, %414 : tensor<1x14x14x576xf32>
  %418 = mhlo.add %417, %412 : tensor<1x14x14x576xf32>
  %419 = "mhlo.clamp"(%0, %418, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %420 = "mhlo.pad"(%419, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
  %421 = mhlo.convolution(%420, %49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
  %422 = "mhlo.broadcast_in_dim"(%cst_178) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %423 = "mhlo.broadcast_in_dim"(%cst_179) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %424 = "mhlo.broadcast_in_dim"(%cst_180) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %425 = "mhlo.broadcast_in_dim"(%50) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %426 = mhlo.subtract %421, %424 : tensor<1x14x14x576xf32>
  %427 = mhlo.multiply %426, %422 : tensor<1x14x14x576xf32>
  %428 = mhlo.divide %427, %425 : tensor<1x14x14x576xf32>
  %429 = mhlo.add %428, %423 : tensor<1x14x14x576xf32>
  %430 = "mhlo.clamp"(%0, %429, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %431 = mhlo.convolution(%430, %cst_170) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %432 = "mhlo.broadcast_in_dim"(%cst_171) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %433 = "mhlo.broadcast_in_dim"(%cst_172) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %434 = "mhlo.broadcast_in_dim"(%cst_173) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %435 = "mhlo.broadcast_in_dim"(%51) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %436 = mhlo.subtract %431, %434 : tensor<1x14x14x96xf32>
  %437 = mhlo.multiply %436, %432 : tensor<1x14x14x96xf32>
  %438 = mhlo.divide %437, %435 : tensor<1x14x14x96xf32>
  %439 = mhlo.add %438, %433 : tensor<1x14x14x96xf32>
  %440 = chlo.broadcast_add %409, %439 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %441 = mhlo.convolution(%440, %cst_163) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %442 = "mhlo.broadcast_in_dim"(%cst_164) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %443 = "mhlo.broadcast_in_dim"(%cst_165) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %444 = "mhlo.broadcast_in_dim"(%cst_166) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %445 = "mhlo.broadcast_in_dim"(%52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %446 = mhlo.subtract %441, %444 : tensor<1x14x14x576xf32>
  %447 = mhlo.multiply %446, %442 : tensor<1x14x14x576xf32>
  %448 = mhlo.divide %447, %445 : tensor<1x14x14x576xf32>
  %449 = mhlo.add %448, %443 : tensor<1x14x14x576xf32>
  %450 = "mhlo.clamp"(%0, %449, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %451 = "mhlo.pad"(%450, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
  %452 = mhlo.convolution(%451, %53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
  %453 = "mhlo.broadcast_in_dim"(%cst_167) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %454 = "mhlo.broadcast_in_dim"(%cst_168) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %455 = "mhlo.broadcast_in_dim"(%cst_169) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %456 = "mhlo.broadcast_in_dim"(%54) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %457 = mhlo.subtract %452, %455 : tensor<1x14x14x576xf32>
  %458 = mhlo.multiply %457, %453 : tensor<1x14x14x576xf32>
  %459 = mhlo.divide %458, %456 : tensor<1x14x14x576xf32>
  %460 = mhlo.add %459, %454 : tensor<1x14x14x576xf32>
  %461 = "mhlo.clamp"(%0, %460, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %462 = mhlo.convolution(%461, %cst_159) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %463 = "mhlo.broadcast_in_dim"(%cst_160) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %464 = "mhlo.broadcast_in_dim"(%cst_161) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %465 = "mhlo.broadcast_in_dim"(%cst_162) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %466 = "mhlo.broadcast_in_dim"(%55) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %467 = mhlo.subtract %462, %465 : tensor<1x14x14x96xf32>
  %468 = mhlo.multiply %467, %463 : tensor<1x14x14x96xf32>
  %469 = mhlo.divide %468, %466 : tensor<1x14x14x96xf32>
  %470 = mhlo.add %469, %464 : tensor<1x14x14x96xf32>
  %471 = chlo.broadcast_add %440, %470 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %472 = mhlo.convolution(%471, %cst_152) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %473 = "mhlo.broadcast_in_dim"(%cst_153) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %474 = "mhlo.broadcast_in_dim"(%cst_154) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %475 = "mhlo.broadcast_in_dim"(%cst_155) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %476 = "mhlo.broadcast_in_dim"(%56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %477 = mhlo.subtract %472, %475 : tensor<1x14x14x576xf32>
  %478 = mhlo.multiply %477, %473 : tensor<1x14x14x576xf32>
  %479 = mhlo.divide %478, %476 : tensor<1x14x14x576xf32>
  %480 = mhlo.add %479, %474 : tensor<1x14x14x576xf32>
  %481 = "mhlo.clamp"(%0, %480, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %482 = "mhlo.pad"(%481, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %483 = mhlo.convolution(%482, %57) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x7x7x576xf32>
  %484 = "mhlo.broadcast_in_dim"(%cst_156) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %485 = "mhlo.broadcast_in_dim"(%cst_157) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %486 = "mhlo.broadcast_in_dim"(%cst_158) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %487 = "mhlo.broadcast_in_dim"(%58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %488 = mhlo.subtract %483, %486 : tensor<1x7x7x576xf32>
  %489 = mhlo.multiply %488, %484 : tensor<1x7x7x576xf32>
  %490 = mhlo.divide %489, %487 : tensor<1x7x7x576xf32>
  %491 = mhlo.add %490, %485 : tensor<1x7x7x576xf32>
  %492 = "mhlo.clamp"(%0, %491, %3) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %493 = mhlo.convolution(%492, %cst_148) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %494 = "mhlo.broadcast_in_dim"(%cst_149) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %495 = "mhlo.broadcast_in_dim"(%cst_150) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %496 = "mhlo.broadcast_in_dim"(%cst_151) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %497 = "mhlo.broadcast_in_dim"(%59) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %498 = mhlo.subtract %493, %496 : tensor<1x7x7x160xf32>
  %499 = mhlo.multiply %498, %494 : tensor<1x7x7x160xf32>
  %500 = mhlo.divide %499, %497 : tensor<1x7x7x160xf32>
  %501 = mhlo.add %500, %495 : tensor<1x7x7x160xf32>
  %502 = mhlo.convolution(%501, %cst_141) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %503 = "mhlo.broadcast_in_dim"(%cst_142) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %504 = "mhlo.broadcast_in_dim"(%cst_143) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %505 = "mhlo.broadcast_in_dim"(%cst_144) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %506 = "mhlo.broadcast_in_dim"(%60) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %507 = mhlo.subtract %502, %505 : tensor<1x7x7x960xf32>
  %508 = mhlo.multiply %507, %503 : tensor<1x7x7x960xf32>
  %509 = mhlo.divide %508, %506 : tensor<1x7x7x960xf32>
  %510 = mhlo.add %509, %504 : tensor<1x7x7x960xf32>
  %511 = "mhlo.clamp"(%0, %510, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %512 = "mhlo.pad"(%511, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %513 = mhlo.convolution(%512, %61) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %514 = "mhlo.broadcast_in_dim"(%cst_145) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %515 = "mhlo.broadcast_in_dim"(%cst_146) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %516 = "mhlo.broadcast_in_dim"(%cst_147) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %517 = "mhlo.broadcast_in_dim"(%62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %518 = mhlo.subtract %513, %516 : tensor<1x7x7x960xf32>
  %519 = mhlo.multiply %518, %514 : tensor<1x7x7x960xf32>
  %520 = mhlo.divide %519, %517 : tensor<1x7x7x960xf32>
  %521 = mhlo.add %520, %515 : tensor<1x7x7x960xf32>
  %522 = "mhlo.clamp"(%0, %521, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %523 = mhlo.convolution(%522, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %524 = "mhlo.broadcast_in_dim"(%cst_138) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %525 = "mhlo.broadcast_in_dim"(%cst_139) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %526 = "mhlo.broadcast_in_dim"(%cst_140) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %527 = "mhlo.broadcast_in_dim"(%63) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %528 = mhlo.subtract %523, %526 : tensor<1x7x7x160xf32>
  %529 = mhlo.multiply %528, %524 : tensor<1x7x7x160xf32>
  %530 = mhlo.divide %529, %527 : tensor<1x7x7x160xf32>
  %531 = mhlo.add %530, %525 : tensor<1x7x7x160xf32>
  %532 = chlo.broadcast_add %501, %531 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %533 = mhlo.convolution(%532, %cst_130) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %534 = "mhlo.broadcast_in_dim"(%cst_131) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %535 = "mhlo.broadcast_in_dim"(%cst_132) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %536 = "mhlo.broadcast_in_dim"(%cst_133) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %537 = "mhlo.broadcast_in_dim"(%64) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %538 = mhlo.subtract %533, %536 : tensor<1x7x7x960xf32>
  %539 = mhlo.multiply %538, %534 : tensor<1x7x7x960xf32>
  %540 = mhlo.divide %539, %537 : tensor<1x7x7x960xf32>
  %541 = mhlo.add %540, %535 : tensor<1x7x7x960xf32>
  %542 = "mhlo.clamp"(%0, %541, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %543 = "mhlo.pad"(%542, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %544 = mhlo.convolution(%543, %65) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %545 = "mhlo.broadcast_in_dim"(%cst_134) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %546 = "mhlo.broadcast_in_dim"(%cst_135) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %547 = "mhlo.broadcast_in_dim"(%cst_136) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %548 = "mhlo.broadcast_in_dim"(%66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %549 = mhlo.subtract %544, %547 : tensor<1x7x7x960xf32>
  %550 = mhlo.multiply %549, %545 : tensor<1x7x7x960xf32>
  %551 = mhlo.divide %550, %548 : tensor<1x7x7x960xf32>
  %552 = mhlo.add %551, %546 : tensor<1x7x7x960xf32>
  %553 = "mhlo.clamp"(%0, %552, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %554 = mhlo.convolution(%553, %cst_126) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %555 = "mhlo.broadcast_in_dim"(%cst_127) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %556 = "mhlo.broadcast_in_dim"(%cst_128) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %557 = "mhlo.broadcast_in_dim"(%cst_129) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %558 = "mhlo.broadcast_in_dim"(%67) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %559 = mhlo.subtract %554, %557 : tensor<1x7x7x160xf32>
  %560 = mhlo.multiply %559, %555 : tensor<1x7x7x160xf32>
  %561 = mhlo.divide %560, %558 : tensor<1x7x7x160xf32>
  %562 = mhlo.add %561, %556 : tensor<1x7x7x160xf32>
  %563 = chlo.broadcast_add %532, %562 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %564 = mhlo.convolution(%563, %cst_119) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %565 = "mhlo.broadcast_in_dim"(%cst_120) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %566 = "mhlo.broadcast_in_dim"(%cst_121) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %567 = "mhlo.broadcast_in_dim"(%cst_122) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %568 = "mhlo.broadcast_in_dim"(%68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %569 = mhlo.subtract %564, %567 : tensor<1x7x7x960xf32>
  %570 = mhlo.multiply %569, %565 : tensor<1x7x7x960xf32>
  %571 = mhlo.divide %570, %568 : tensor<1x7x7x960xf32>
  %572 = mhlo.add %571, %566 : tensor<1x7x7x960xf32>
  %573 = "mhlo.clamp"(%0, %572, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %574 = "mhlo.pad"(%573, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %575 = mhlo.convolution(%574, %69) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %576 = "mhlo.broadcast_in_dim"(%cst_123) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %577 = "mhlo.broadcast_in_dim"(%cst_124) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %578 = "mhlo.broadcast_in_dim"(%cst_125) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %579 = "mhlo.broadcast_in_dim"(%70) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %580 = mhlo.subtract %575, %578 : tensor<1x7x7x960xf32>
  %581 = mhlo.multiply %580, %576 : tensor<1x7x7x960xf32>
  %582 = mhlo.divide %581, %579 : tensor<1x7x7x960xf32>
  %583 = mhlo.add %582, %577 : tensor<1x7x7x960xf32>
  %584 = "mhlo.clamp"(%0, %583, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %585 = mhlo.convolution(%584, %cst_115) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %586 = "mhlo.broadcast_in_dim"(%cst_116) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %587 = "mhlo.broadcast_in_dim"(%cst_117) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %588 = "mhlo.broadcast_in_dim"(%cst_118) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %589 = "mhlo.broadcast_in_dim"(%71) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %590 = mhlo.subtract %585, %588 : tensor<1x7x7x320xf32>
  %591 = mhlo.multiply %590, %586 : tensor<1x7x7x320xf32>
  %592 = mhlo.divide %591, %589 : tensor<1x7x7x320xf32>
  %593 = mhlo.add %592, %587 : tensor<1x7x7x320xf32>
  %594 = mhlo.convolution(%593, %cst_8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %595 = "mhlo.broadcast_in_dim"(%cst_9) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %596 = "mhlo.broadcast_in_dim"(%cst_10) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %597 = "mhlo.broadcast_in_dim"(%cst_11) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %598 = "mhlo.broadcast_in_dim"(%72) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %599 = mhlo.subtract %594, %597 : tensor<1x7x7x1280xf32>
  %600 = mhlo.multiply %599, %595 : tensor<1x7x7x1280xf32>
  %601 = mhlo.divide %600, %598 : tensor<1x7x7x1280xf32>
  %602 = mhlo.add %601, %596 : tensor<1x7x7x1280xf32>
  %603 = "mhlo.clamp"(%0, %602, %3) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %604 = "mhlo.reduce"(%603, %0) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %605 = chlo.broadcast_divide %604, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %606 = "mhlo.dot"(%605, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %607 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %608 = mhlo.add %606, %607 : tensor<1x1000xf32>
  %609 = "mhlo.reduce"(%608, %1) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %610 = linalg.tensor_expand_shape %609 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %611 = chlo.broadcast_subtract %608, %610 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %612 = "mhlo.exponential"(%611) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %613 = "mhlo.reduce"(%612, %0) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %614 = linalg.tensor_expand_shape %613 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %615 = chlo.broadcast_divide %612, %614 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %615 : tensor<1x1000xf32>
 }

 // -----// IR Dump After LegalizeInputTypes //----- //
 builtin.module  {
  builtin.func @call(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,4,1,224,224,3]],\22r\22:[[\22ndarray\22,\22f32\22,2,1,1000]],\22v\22:1}"}} {
    %0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x224x224x3xf32>
    %1 = call @"__inference_<lambda>_133580"(%0) : (tensor<1x224x224x3xf32>) -> tensor<1x1000xf32>
    %2 = hal.tensor.cast %1 : tensor<1x1000xf32> -> !hal.buffer_view
    return %2 : !hal.buffer_view
  }
  builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
    %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
    %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
    %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
    %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
    %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
    %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
    %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
    %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
    %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
    %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
    %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
    %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
    %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
    %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
    %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
    %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
    %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
    %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
    %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
    %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
    %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
    %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
    %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %0 = mhlo.constant dense<0.000000e+00> : tensor<f32>
    %1 = mhlo.constant dense<0xFF800000> : tensor<f32>
    %2 = mhlo.constant dense<4.900000e+01> : tensor<f32>
    %3 = mhlo.constant dense<6.000000e+00> : tensor<f32>
    %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32>
    %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
    %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32>
    %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
    %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
    %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
    %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
    %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %21 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
    %22 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %23 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %24 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %25 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
    %26 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %27 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
    %28 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %29 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
    %30 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
    %31 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %32 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %33 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
    %34 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %35 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %36 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %37 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
    %38 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %39 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %40 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %41 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
    %42 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %43 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
    %44 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %45 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
    %46 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
    %47 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %48 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %49 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
    %50 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %51 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %52 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %53 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
    %54 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %55 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
    %56 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %57 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
    %58 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
    %59 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %60 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %61 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
    %62 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %63 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %64 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %65 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
    %66 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %67 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
    %68 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %69 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
    %cst_192 = constant dense<0.000000e+00> : tensor<f32>
    %70 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
    %71 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
    %72 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
    %73 = "mhlo.pad"(%arg0, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x224x224x3xf32>, tensor<f32>) -> tensor<1x225x225x3xf32>
    %74 = mhlo.convolution(%73, %cst_12) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
    %75 = "mhlo.broadcast_in_dim"(%cst_13) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %76 = "mhlo.broadcast_in_dim"(%cst_14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %77 = "mhlo.broadcast_in_dim"(%cst_15) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %78 = "mhlo.broadcast_in_dim"(%4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %79 = mhlo.subtract %74, %77 : tensor<1x112x112x32xf32>
    %80 = mhlo.multiply %79, %75 : tensor<1x112x112x32xf32>
    %81 = mhlo.divide %80, %78 : tensor<1x112x112x32xf32>
    %82 = mhlo.add %81, %76 : tensor<1x112x112x32xf32>
    %83 = "mhlo.clamp"(%0, %82, %3) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %84 = "mhlo.pad"(%83, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x114x114x32xf32>
    %85 = mhlo.convolution(%84, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x114x114x32xf32>, tensor<3x3x32x1xf32>) -> tensor<1x112x112x32xf32>
    %86 = "mhlo.broadcast_in_dim"(%cst_5) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %87 = "mhlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %88 = "mhlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %89 = "mhlo.broadcast_in_dim"(%6) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
    %90 = mhlo.subtract %85, %88 : tensor<1x112x112x32xf32>
    %91 = mhlo.multiply %90, %86 : tensor<1x112x112x32xf32>
    %92 = mhlo.divide %91, %89 : tensor<1x112x112x32xf32>
    %93 = mhlo.add %92, %87 : tensor<1x112x112x32xf32>
    %94 = "mhlo.clamp"(%0, %93, %3) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
    %95 = mhlo.convolution(%94, %cst_1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
    %96 = "mhlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %97 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %98 = "mhlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %99 = "mhlo.broadcast_in_dim"(%7) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
    %100 = mhlo.subtract %95, %98 : tensor<1x112x112x16xf32>
    %101 = mhlo.multiply %100, %96 : tensor<1x112x112x16xf32>
    %102 = mhlo.divide %101, %99 : tensor<1x112x112x16xf32>
    %103 = mhlo.add %102, %97 : tensor<1x112x112x16xf32>
    %104 = mhlo.convolution(%103, %cst_108) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
    %105 = "mhlo.broadcast_in_dim"(%cst_109) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %106 = "mhlo.broadcast_in_dim"(%cst_110) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %107 = "mhlo.broadcast_in_dim"(%cst_111) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %108 = "mhlo.broadcast_in_dim"(%8) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
    %109 = mhlo.subtract %104, %107 : tensor<1x112x112x96xf32>
    %110 = mhlo.multiply %109, %105 : tensor<1x112x112x96xf32>
    %111 = mhlo.divide %110, %108 : tensor<1x112x112x96xf32>
    %112 = mhlo.add %111, %106 : tensor<1x112x112x96xf32>
    %113 = "mhlo.clamp"(%0, %112, %3) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
    %114 = "mhlo.pad"(%113, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
    %115 = mhlo.convolution(%114, %9) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x96x1xf32>) -> tensor<1x56x56x96xf32>
    %116 = "mhlo.broadcast_in_dim"(%cst_112) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %117 = "mhlo.broadcast_in_dim"(%cst_113) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %118 = "mhlo.broadcast_in_dim"(%cst_114) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %119 = "mhlo.broadcast_in_dim"(%10) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
    %120 = mhlo.subtract %115, %118 : tensor<1x56x56x96xf32>
    %121 = mhlo.multiply %120, %116 : tensor<1x56x56x96xf32>
    %122 = mhlo.divide %121, %119 : tensor<1x56x56x96xf32>
    %123 = mhlo.add %122, %117 : tensor<1x56x56x96xf32>
    %124 = "mhlo.clamp"(%0, %123, %3) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
    %125 = mhlo.convolution(%124, %cst_104) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
    %126 = "mhlo.broadcast_in_dim"(%cst_105) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %127 = "mhlo.broadcast_in_dim"(%cst_106) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %128 = "mhlo.broadcast_in_dim"(%cst_107) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %129 = "mhlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %130 = mhlo.subtract %125, %128 : tensor<1x56x56x24xf32>
    %131 = mhlo.multiply %130, %126 : tensor<1x56x56x24xf32>
    %132 = mhlo.divide %131, %129 : tensor<1x56x56x24xf32>
    %133 = mhlo.add %132, %127 : tensor<1x56x56x24xf32>
    %134 = mhlo.convolution(%133, %cst_97) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %135 = "mhlo.broadcast_in_dim"(%cst_98) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %136 = "mhlo.broadcast_in_dim"(%cst_99) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %137 = "mhlo.broadcast_in_dim"(%cst_100) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %138 = "mhlo.broadcast_in_dim"(%12) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %139 = mhlo.subtract %134, %137 : tensor<1x56x56x144xf32>
    %140 = mhlo.multiply %139, %135 : tensor<1x56x56x144xf32>
    %141 = mhlo.divide %140, %138 : tensor<1x56x56x144xf32>
    %142 = mhlo.add %141, %136 : tensor<1x56x56x144xf32>
    %143 = "mhlo.clamp"(%0, %142, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %144 = "mhlo.pad"(%143, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x58x58x144xf32>
    %145 = mhlo.convolution(%144, %13) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x58x58x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x56x56x144xf32>
    %146 = "mhlo.broadcast_in_dim"(%cst_101) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %147 = "mhlo.broadcast_in_dim"(%cst_102) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %148 = "mhlo.broadcast_in_dim"(%cst_103) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %149 = "mhlo.broadcast_in_dim"(%14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %150 = mhlo.subtract %145, %148 : tensor<1x56x56x144xf32>
    %151 = mhlo.multiply %150, %146 : tensor<1x56x56x144xf32>
    %152 = mhlo.divide %151, %149 : tensor<1x56x56x144xf32>
    %153 = mhlo.add %152, %147 : tensor<1x56x56x144xf32>
    %154 = "mhlo.clamp"(%0, %153, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %155 = mhlo.convolution(%154, %cst_93) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
    %156 = "mhlo.broadcast_in_dim"(%cst_94) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %157 = "mhlo.broadcast_in_dim"(%cst_95) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %158 = "mhlo.broadcast_in_dim"(%cst_96) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %159 = "mhlo.broadcast_in_dim"(%15) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
    %160 = mhlo.subtract %155, %158 : tensor<1x56x56x24xf32>
    %161 = mhlo.multiply %160, %156 : tensor<1x56x56x24xf32>
    %162 = mhlo.divide %161, %159 : tensor<1x56x56x24xf32>
    %163 = mhlo.add %162, %157 : tensor<1x56x56x24xf32>
    %164 = chlo.broadcast_add %133, %163 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
    %165 = mhlo.convolution(%164, %cst_86) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
    %166 = "mhlo.broadcast_in_dim"(%cst_87) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %167 = "mhlo.broadcast_in_dim"(%cst_88) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %168 = "mhlo.broadcast_in_dim"(%cst_89) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %169 = "mhlo.broadcast_in_dim"(%16) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
    %170 = mhlo.subtract %165, %168 : tensor<1x56x56x144xf32>
    %171 = mhlo.multiply %170, %166 : tensor<1x56x56x144xf32>
    %172 = mhlo.divide %171, %169 : tensor<1x56x56x144xf32>
    %173 = mhlo.add %172, %167 : tensor<1x56x56x144xf32>
    %174 = "mhlo.clamp"(%0, %173, %3) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
    %175 = "mhlo.pad"(%174, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
    %176 = mhlo.convolution(%175, %17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x28x28x144xf32>
    %177 = "mhlo.broadcast_in_dim"(%cst_90) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %178 = "mhlo.broadcast_in_dim"(%cst_91) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %179 = "mhlo.broadcast_in_dim"(%cst_92) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %180 = "mhlo.broadcast_in_dim"(%18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
    %181 = mhlo.subtract %176, %179 : tensor<1x28x28x144xf32>
    %182 = mhlo.multiply %181, %177 : tensor<1x28x28x144xf32>
    %183 = mhlo.divide %182, %180 : tensor<1x28x28x144xf32>
    %184 = mhlo.add %183, %178 : tensor<1x28x28x144xf32>
    %185 = "mhlo.clamp"(%0, %184, %3) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
    %186 = mhlo.convolution(%185, %cst_82) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
    %187 = "mhlo.broadcast_in_dim"(%cst_83) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %188 = "mhlo.broadcast_in_dim"(%cst_84) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %189 = "mhlo.broadcast_in_dim"(%cst_85) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %190 = "mhlo.broadcast_in_dim"(%19) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %191 = mhlo.subtract %186, %189 : tensor<1x28x28x32xf32>
    %192 = mhlo.multiply %191, %187 : tensor<1x28x28x32xf32>
    %193 = mhlo.divide %192, %190 : tensor<1x28x28x32xf32>
    %194 = mhlo.add %193, %188 : tensor<1x28x28x32xf32>
    %195 = mhlo.convolution(%194, %cst_75) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %196 = "mhlo.broadcast_in_dim"(%cst_76) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %197 = "mhlo.broadcast_in_dim"(%cst_77) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %198 = "mhlo.broadcast_in_dim"(%cst_78) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %199 = "mhlo.broadcast_in_dim"(%20) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %200 = mhlo.subtract %195, %198 : tensor<1x28x28x192xf32>
    %201 = mhlo.multiply %200, %196 : tensor<1x28x28x192xf32>
    %202 = mhlo.divide %201, %199 : tensor<1x28x28x192xf32>
    %203 = mhlo.add %202, %197 : tensor<1x28x28x192xf32>
    %204 = "mhlo.clamp"(%0, %203, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %205 = "mhlo.pad"(%204, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
    %206 = mhlo.convolution(%205, %21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
    %207 = "mhlo.broadcast_in_dim"(%cst_79) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %208 = "mhlo.broadcast_in_dim"(%cst_80) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %209 = "mhlo.broadcast_in_dim"(%cst_81) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %210 = "mhlo.broadcast_in_dim"(%22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %211 = mhlo.subtract %206, %209 : tensor<1x28x28x192xf32>
    %212 = mhlo.multiply %211, %207 : tensor<1x28x28x192xf32>
    %213 = mhlo.divide %212, %210 : tensor<1x28x28x192xf32>
    %214 = mhlo.add %213, %208 : tensor<1x28x28x192xf32>
    %215 = "mhlo.clamp"(%0, %214, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %216 = mhlo.convolution(%215, %cst_71) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %217 = "mhlo.broadcast_in_dim"(%cst_72) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %218 = "mhlo.broadcast_in_dim"(%cst_73) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %219 = "mhlo.broadcast_in_dim"(%cst_74) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %220 = "mhlo.broadcast_in_dim"(%23) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %221 = mhlo.subtract %216, %219 : tensor<1x28x28x32xf32>
    %222 = mhlo.multiply %221, %217 : tensor<1x28x28x32xf32>
    %223 = mhlo.divide %222, %220 : tensor<1x28x28x32xf32>
    %224 = mhlo.add %223, %218 : tensor<1x28x28x32xf32>
    %225 = chlo.broadcast_add %194, %224 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %226 = mhlo.convolution(%225, %cst_64) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %227 = "mhlo.broadcast_in_dim"(%cst_65) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %228 = "mhlo.broadcast_in_dim"(%cst_66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %229 = "mhlo.broadcast_in_dim"(%cst_67) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %230 = "mhlo.broadcast_in_dim"(%24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %231 = mhlo.subtract %226, %229 : tensor<1x28x28x192xf32>
    %232 = mhlo.multiply %231, %227 : tensor<1x28x28x192xf32>
    %233 = mhlo.divide %232, %230 : tensor<1x28x28x192xf32>
    %234 = mhlo.add %233, %228 : tensor<1x28x28x192xf32>
    %235 = "mhlo.clamp"(%0, %234, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %236 = "mhlo.pad"(%235, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
    %237 = mhlo.convolution(%236, %25) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
    %238 = "mhlo.broadcast_in_dim"(%cst_68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %239 = "mhlo.broadcast_in_dim"(%cst_69) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %240 = "mhlo.broadcast_in_dim"(%cst_70) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %241 = "mhlo.broadcast_in_dim"(%26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %242 = mhlo.subtract %237, %240 : tensor<1x28x28x192xf32>
    %243 = mhlo.multiply %242, %238 : tensor<1x28x28x192xf32>
    %244 = mhlo.divide %243, %241 : tensor<1x28x28x192xf32>
    %245 = mhlo.add %244, %239 : tensor<1x28x28x192xf32>
    %246 = "mhlo.clamp"(%0, %245, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %247 = mhlo.convolution(%246, %cst_60) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
    %248 = "mhlo.broadcast_in_dim"(%cst_61) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %249 = "mhlo.broadcast_in_dim"(%cst_62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %250 = "mhlo.broadcast_in_dim"(%cst_63) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %251 = "mhlo.broadcast_in_dim"(%27) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
    %252 = mhlo.subtract %247, %250 : tensor<1x28x28x32xf32>
    %253 = mhlo.multiply %252, %248 : tensor<1x28x28x32xf32>
    %254 = mhlo.divide %253, %251 : tensor<1x28x28x32xf32>
    %255 = mhlo.add %254, %249 : tensor<1x28x28x32xf32>
    %256 = chlo.broadcast_add %225, %255 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
    %257 = mhlo.convolution(%256, %cst_53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
    %258 = "mhlo.broadcast_in_dim"(%cst_54) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %259 = "mhlo.broadcast_in_dim"(%cst_55) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %260 = "mhlo.broadcast_in_dim"(%cst_56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %261 = "mhlo.broadcast_in_dim"(%28) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
    %262 = mhlo.subtract %257, %260 : tensor<1x28x28x192xf32>
    %263 = mhlo.multiply %262, %258 : tensor<1x28x28x192xf32>
    %264 = mhlo.divide %263, %261 : tensor<1x28x28x192xf32>
    %265 = mhlo.add %264, %259 : tensor<1x28x28x192xf32>
    %266 = "mhlo.clamp"(%0, %265, %3) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
    %267 = "mhlo.pad"(%266, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
    %268 = mhlo.convolution(%267, %29) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x14x14x192xf32>
    %269 = "mhlo.broadcast_in_dim"(%cst_57) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %270 = "mhlo.broadcast_in_dim"(%cst_58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %271 = "mhlo.broadcast_in_dim"(%cst_59) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %272 = "mhlo.broadcast_in_dim"(%30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
    %273 = mhlo.subtract %268, %271 : tensor<1x14x14x192xf32>
    %274 = mhlo.multiply %273, %269 : tensor<1x14x14x192xf32>
    %275 = mhlo.divide %274, %272 : tensor<1x14x14x192xf32>
    %276 = mhlo.add %275, %270 : tensor<1x14x14x192xf32>
    %277 = "mhlo.clamp"(%0, %276, %3) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
    %278 = mhlo.convolution(%277, %cst_49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
    %279 = "mhlo.broadcast_in_dim"(%cst_50) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %280 = "mhlo.broadcast_in_dim"(%cst_51) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %281 = "mhlo.broadcast_in_dim"(%cst_52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %282 = "mhlo.broadcast_in_dim"(%31) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %283 = mhlo.subtract %278, %281 : tensor<1x14x14x64xf32>
    %284 = mhlo.multiply %283, %279 : tensor<1x14x14x64xf32>
    %285 = mhlo.divide %284, %282 : tensor<1x14x14x64xf32>
    %286 = mhlo.add %285, %280 : tensor<1x14x14x64xf32>
    %287 = mhlo.convolution(%286, %cst_42) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %288 = "mhlo.broadcast_in_dim"(%cst_43) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %289 = "mhlo.broadcast_in_dim"(%cst_44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %290 = "mhlo.broadcast_in_dim"(%cst_45) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %291 = "mhlo.broadcast_in_dim"(%32) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %292 = mhlo.subtract %287, %290 : tensor<1x14x14x384xf32>
    %293 = mhlo.multiply %292, %288 : tensor<1x14x14x384xf32>
    %294 = mhlo.divide %293, %291 : tensor<1x14x14x384xf32>
    %295 = mhlo.add %294, %289 : tensor<1x14x14x384xf32>
    %296 = "mhlo.clamp"(%0, %295, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %297 = "mhlo.pad"(%296, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
    %298 = mhlo.convolution(%297, %33) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
    %299 = "mhlo.broadcast_in_dim"(%cst_46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %300 = "mhlo.broadcast_in_dim"(%cst_47) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %301 = "mhlo.broadcast_in_dim"(%cst_48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %302 = "mhlo.broadcast_in_dim"(%34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %303 = mhlo.subtract %298, %301 : tensor<1x14x14x384xf32>
    %304 = mhlo.multiply %303, %299 : tensor<1x14x14x384xf32>
    %305 = mhlo.divide %304, %302 : tensor<1x14x14x384xf32>
    %306 = mhlo.add %305, %300 : tensor<1x14x14x384xf32>
    %307 = "mhlo.clamp"(%0, %306, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %308 = mhlo.convolution(%307, %cst_38) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %309 = "mhlo.broadcast_in_dim"(%cst_39) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %310 = "mhlo.broadcast_in_dim"(%cst_40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %311 = "mhlo.broadcast_in_dim"(%cst_41) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %312 = "mhlo.broadcast_in_dim"(%35) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %313 = mhlo.subtract %308, %311 : tensor<1x14x14x64xf32>
    %314 = mhlo.multiply %313, %309 : tensor<1x14x14x64xf32>
    %315 = mhlo.divide %314, %312 : tensor<1x14x14x64xf32>
    %316 = mhlo.add %315, %310 : tensor<1x14x14x64xf32>
    %317 = chlo.broadcast_add %286, %316 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %318 = mhlo.convolution(%317, %cst_31) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %319 = "mhlo.broadcast_in_dim"(%cst_32) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %320 = "mhlo.broadcast_in_dim"(%cst_33) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %321 = "mhlo.broadcast_in_dim"(%cst_34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %322 = "mhlo.broadcast_in_dim"(%36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %323 = mhlo.subtract %318, %321 : tensor<1x14x14x384xf32>
    %324 = mhlo.multiply %323, %319 : tensor<1x14x14x384xf32>
    %325 = mhlo.divide %324, %322 : tensor<1x14x14x384xf32>
    %326 = mhlo.add %325, %320 : tensor<1x14x14x384xf32>
    %327 = "mhlo.clamp"(%0, %326, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %328 = "mhlo.pad"(%327, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
    %329 = mhlo.convolution(%328, %37) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
    %330 = "mhlo.broadcast_in_dim"(%cst_35) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %331 = "mhlo.broadcast_in_dim"(%cst_36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %332 = "mhlo.broadcast_in_dim"(%cst_37) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %333 = "mhlo.broadcast_in_dim"(%38) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %334 = mhlo.subtract %329, %332 : tensor<1x14x14x384xf32>
    %335 = mhlo.multiply %334, %330 : tensor<1x14x14x384xf32>
    %336 = mhlo.divide %335, %333 : tensor<1x14x14x384xf32>
    %337 = mhlo.add %336, %331 : tensor<1x14x14x384xf32>
    %338 = "mhlo.clamp"(%0, %337, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %339 = mhlo.convolution(%338, %cst_27) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %340 = "mhlo.broadcast_in_dim"(%cst_28) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %341 = "mhlo.broadcast_in_dim"(%cst_29) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %342 = "mhlo.broadcast_in_dim"(%cst_30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %343 = "mhlo.broadcast_in_dim"(%39) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %344 = mhlo.subtract %339, %342 : tensor<1x14x14x64xf32>
    %345 = mhlo.multiply %344, %340 : tensor<1x14x14x64xf32>
    %346 = mhlo.divide %345, %343 : tensor<1x14x14x64xf32>
    %347 = mhlo.add %346, %341 : tensor<1x14x14x64xf32>
    %348 = chlo.broadcast_add %317, %347 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %349 = mhlo.convolution(%348, %cst_20) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %350 = "mhlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %351 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %352 = "mhlo.broadcast_in_dim"(%cst_23) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %353 = "mhlo.broadcast_in_dim"(%40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %354 = mhlo.subtract %349, %352 : tensor<1x14x14x384xf32>
    %355 = mhlo.multiply %354, %350 : tensor<1x14x14x384xf32>
    %356 = mhlo.divide %355, %353 : tensor<1x14x14x384xf32>
    %357 = mhlo.add %356, %351 : tensor<1x14x14x384xf32>
    %358 = "mhlo.clamp"(%0, %357, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %359 = "mhlo.pad"(%358, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
    %360 = mhlo.convolution(%359, %41) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
    %361 = "mhlo.broadcast_in_dim"(%cst_24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %362 = "mhlo.broadcast_in_dim"(%cst_25) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %363 = "mhlo.broadcast_in_dim"(%cst_26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %364 = "mhlo.broadcast_in_dim"(%42) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %365 = mhlo.subtract %360, %363 : tensor<1x14x14x384xf32>
    %366 = mhlo.multiply %365, %361 : tensor<1x14x14x384xf32>
    %367 = mhlo.divide %366, %364 : tensor<1x14x14x384xf32>
    %368 = mhlo.add %367, %362 : tensor<1x14x14x384xf32>
    %369 = "mhlo.clamp"(%0, %368, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %370 = mhlo.convolution(%369, %cst_16) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
    %371 = "mhlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %372 = "mhlo.broadcast_in_dim"(%cst_18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %373 = "mhlo.broadcast_in_dim"(%cst_19) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %374 = "mhlo.broadcast_in_dim"(%43) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
    %375 = mhlo.subtract %370, %373 : tensor<1x14x14x64xf32>
    %376 = mhlo.multiply %375, %371 : tensor<1x14x14x64xf32>
    %377 = mhlo.divide %376, %374 : tensor<1x14x14x64xf32>
    %378 = mhlo.add %377, %372 : tensor<1x14x14x64xf32>
    %379 = chlo.broadcast_add %348, %378 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
    %380 = mhlo.convolution(%379, %cst_185) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
    %381 = "mhlo.broadcast_in_dim"(%cst_186) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %382 = "mhlo.broadcast_in_dim"(%cst_187) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %383 = "mhlo.broadcast_in_dim"(%cst_188) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %384 = "mhlo.broadcast_in_dim"(%44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %385 = mhlo.subtract %380, %383 : tensor<1x14x14x384xf32>
    %386 = mhlo.multiply %385, %381 : tensor<1x14x14x384xf32>
    %387 = mhlo.divide %386, %384 : tensor<1x14x14x384xf32>
    %388 = mhlo.add %387, %382 : tensor<1x14x14x384xf32>
    %389 = "mhlo.clamp"(%0, %388, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %390 = "mhlo.pad"(%389, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
    %391 = mhlo.convolution(%390, %45) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
    %392 = "mhlo.broadcast_in_dim"(%cst_189) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %393 = "mhlo.broadcast_in_dim"(%cst_190) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %394 = "mhlo.broadcast_in_dim"(%cst_191) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %395 = "mhlo.broadcast_in_dim"(%46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
    %396 = mhlo.subtract %391, %394 : tensor<1x14x14x384xf32>
    %397 = mhlo.multiply %396, %392 : tensor<1x14x14x384xf32>
    %398 = mhlo.divide %397, %395 : tensor<1x14x14x384xf32>
    %399 = mhlo.add %398, %393 : tensor<1x14x14x384xf32>
    %400 = "mhlo.clamp"(%0, %399, %3) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
    %401 = mhlo.convolution(%400, %cst_181) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
    %402 = "mhlo.broadcast_in_dim"(%cst_182) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %403 = "mhlo.broadcast_in_dim"(%cst_183) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %404 = "mhlo.broadcast_in_dim"(%cst_184) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %405 = "mhlo.broadcast_in_dim"(%47) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %406 = mhlo.subtract %401, %404 : tensor<1x14x14x96xf32>
    %407 = mhlo.multiply %406, %402 : tensor<1x14x14x96xf32>
    %408 = mhlo.divide %407, %405 : tensor<1x14x14x96xf32>
    %409 = mhlo.add %408, %403 : tensor<1x14x14x96xf32>
    %410 = mhlo.convolution(%409, %cst_174) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %411 = "mhlo.broadcast_in_dim"(%cst_175) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %412 = "mhlo.broadcast_in_dim"(%cst_176) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %413 = "mhlo.broadcast_in_dim"(%cst_177) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %414 = "mhlo.broadcast_in_dim"(%48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %415 = mhlo.subtract %410, %413 : tensor<1x14x14x576xf32>
    %416 = mhlo.multiply %415, %411 : tensor<1x14x14x576xf32>
    %417 = mhlo.divide %416, %414 : tensor<1x14x14x576xf32>
    %418 = mhlo.add %417, %412 : tensor<1x14x14x576xf32>
    %419 = "mhlo.clamp"(%0, %418, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %420 = "mhlo.pad"(%419, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
    %421 = mhlo.convolution(%420, %49) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
    %422 = "mhlo.broadcast_in_dim"(%cst_178) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %423 = "mhlo.broadcast_in_dim"(%cst_179) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %424 = "mhlo.broadcast_in_dim"(%cst_180) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %425 = "mhlo.broadcast_in_dim"(%50) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %426 = mhlo.subtract %421, %424 : tensor<1x14x14x576xf32>
    %427 = mhlo.multiply %426, %422 : tensor<1x14x14x576xf32>
    %428 = mhlo.divide %427, %425 : tensor<1x14x14x576xf32>
    %429 = mhlo.add %428, %423 : tensor<1x14x14x576xf32>
    %430 = "mhlo.clamp"(%0, %429, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %431 = mhlo.convolution(%430, %cst_170) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %432 = "mhlo.broadcast_in_dim"(%cst_171) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %433 = "mhlo.broadcast_in_dim"(%cst_172) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %434 = "mhlo.broadcast_in_dim"(%cst_173) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %435 = "mhlo.broadcast_in_dim"(%51) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %436 = mhlo.subtract %431, %434 : tensor<1x14x14x96xf32>
    %437 = mhlo.multiply %436, %432 : tensor<1x14x14x96xf32>
    %438 = mhlo.divide %437, %435 : tensor<1x14x14x96xf32>
    %439 = mhlo.add %438, %433 : tensor<1x14x14x96xf32>
    %440 = chlo.broadcast_add %409, %439 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %441 = mhlo.convolution(%440, %cst_163) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %442 = "mhlo.broadcast_in_dim"(%cst_164) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %443 = "mhlo.broadcast_in_dim"(%cst_165) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %444 = "mhlo.broadcast_in_dim"(%cst_166) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %445 = "mhlo.broadcast_in_dim"(%52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %446 = mhlo.subtract %441, %444 : tensor<1x14x14x576xf32>
    %447 = mhlo.multiply %446, %442 : tensor<1x14x14x576xf32>
    %448 = mhlo.divide %447, %445 : tensor<1x14x14x576xf32>
    %449 = mhlo.add %448, %443 : tensor<1x14x14x576xf32>
    %450 = "mhlo.clamp"(%0, %449, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %451 = "mhlo.pad"(%450, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
    %452 = mhlo.convolution(%451, %53) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
    %453 = "mhlo.broadcast_in_dim"(%cst_167) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %454 = "mhlo.broadcast_in_dim"(%cst_168) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %455 = "mhlo.broadcast_in_dim"(%cst_169) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %456 = "mhlo.broadcast_in_dim"(%54) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %457 = mhlo.subtract %452, %455 : tensor<1x14x14x576xf32>
    %458 = mhlo.multiply %457, %453 : tensor<1x14x14x576xf32>
    %459 = mhlo.divide %458, %456 : tensor<1x14x14x576xf32>
    %460 = mhlo.add %459, %454 : tensor<1x14x14x576xf32>
    %461 = "mhlo.clamp"(%0, %460, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %462 = mhlo.convolution(%461, %cst_159) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
    %463 = "mhlo.broadcast_in_dim"(%cst_160) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %464 = "mhlo.broadcast_in_dim"(%cst_161) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %465 = "mhlo.broadcast_in_dim"(%cst_162) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %466 = "mhlo.broadcast_in_dim"(%55) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
    %467 = mhlo.subtract %462, %465 : tensor<1x14x14x96xf32>
    %468 = mhlo.multiply %467, %463 : tensor<1x14x14x96xf32>
    %469 = mhlo.divide %468, %466 : tensor<1x14x14x96xf32>
    %470 = mhlo.add %469, %464 : tensor<1x14x14x96xf32>
    %471 = chlo.broadcast_add %440, %470 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
    %472 = mhlo.convolution(%471, %cst_152) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
    %473 = "mhlo.broadcast_in_dim"(%cst_153) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %474 = "mhlo.broadcast_in_dim"(%cst_154) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %475 = "mhlo.broadcast_in_dim"(%cst_155) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %476 = "mhlo.broadcast_in_dim"(%56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
    %477 = mhlo.subtract %472, %475 : tensor<1x14x14x576xf32>
    %478 = mhlo.multiply %477, %473 : tensor<1x14x14x576xf32>
    %479 = mhlo.divide %478, %476 : tensor<1x14x14x576xf32>
    %480 = mhlo.add %479, %474 : tensor<1x14x14x576xf32>
    %481 = "mhlo.clamp"(%0, %480, %3) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
    %482 = "mhlo.pad"(%481, %0) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
    %483 = mhlo.convolution(%482, %57) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x7x7x576xf32>
    %484 = "mhlo.broadcast_in_dim"(%cst_156) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %485 = "mhlo.broadcast_in_dim"(%cst_157) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %486 = "mhlo.broadcast_in_dim"(%cst_158) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %487 = "mhlo.broadcast_in_dim"(%58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
    %488 = mhlo.subtract %483, %486 : tensor<1x7x7x576xf32>
    %489 = mhlo.multiply %488, %484 : tensor<1x7x7x576xf32>
    %490 = mhlo.divide %489, %487 : tensor<1x7x7x576xf32>
    %491 = mhlo.add %490, %485 : tensor<1x7x7x576xf32>
    %492 = "mhlo.clamp"(%0, %491, %3) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
    %493 = mhlo.convolution(%492, %cst_148) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
    %494 = "mhlo.broadcast_in_dim"(%cst_149) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %495 = "mhlo.broadcast_in_dim"(%cst_150) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %496 = "mhlo.broadcast_in_dim"(%cst_151) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %497 = "mhlo.broadcast_in_dim"(%59) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %498 = mhlo.subtract %493, %496 : tensor<1x7x7x160xf32>
    %499 = mhlo.multiply %498, %494 : tensor<1x7x7x160xf32>
    %500 = mhlo.divide %499, %497 : tensor<1x7x7x160xf32>
    %501 = mhlo.add %500, %495 : tensor<1x7x7x160xf32>
    %502 = mhlo.convolution(%501, %cst_141) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %503 = "mhlo.broadcast_in_dim"(%cst_142) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %504 = "mhlo.broadcast_in_dim"(%cst_143) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %505 = "mhlo.broadcast_in_dim"(%cst_144) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %506 = "mhlo.broadcast_in_dim"(%60) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %507 = mhlo.subtract %502, %505 : tensor<1x7x7x960xf32>
    %508 = mhlo.multiply %507, %503 : tensor<1x7x7x960xf32>
    %509 = mhlo.divide %508, %506 : tensor<1x7x7x960xf32>
    %510 = mhlo.add %509, %504 : tensor<1x7x7x960xf32>
    %511 = "mhlo.clamp"(%0, %510, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %512 = "mhlo.pad"(%511, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
    %513 = mhlo.convolution(%512, %61) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
    %514 = "mhlo.broadcast_in_dim"(%cst_145) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %515 = "mhlo.broadcast_in_dim"(%cst_146) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %516 = "mhlo.broadcast_in_dim"(%cst_147) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %517 = "mhlo.broadcast_in_dim"(%62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %518 = mhlo.subtract %513, %516 : tensor<1x7x7x960xf32>
    %519 = mhlo.multiply %518, %514 : tensor<1x7x7x960xf32>
    %520 = mhlo.divide %519, %517 : tensor<1x7x7x960xf32>
    %521 = mhlo.add %520, %515 : tensor<1x7x7x960xf32>
    %522 = "mhlo.clamp"(%0, %521, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %523 = mhlo.convolution(%522, %cst_137) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %524 = "mhlo.broadcast_in_dim"(%cst_138) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %525 = "mhlo.broadcast_in_dim"(%cst_139) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %526 = "mhlo.broadcast_in_dim"(%cst_140) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %527 = "mhlo.broadcast_in_dim"(%63) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %528 = mhlo.subtract %523, %526 : tensor<1x7x7x160xf32>
    %529 = mhlo.multiply %528, %524 : tensor<1x7x7x160xf32>
    %530 = mhlo.divide %529, %527 : tensor<1x7x7x160xf32>
    %531 = mhlo.add %530, %525 : tensor<1x7x7x160xf32>
    %532 = chlo.broadcast_add %501, %531 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %533 = mhlo.convolution(%532, %cst_130) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %534 = "mhlo.broadcast_in_dim"(%cst_131) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %535 = "mhlo.broadcast_in_dim"(%cst_132) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %536 = "mhlo.broadcast_in_dim"(%cst_133) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %537 = "mhlo.broadcast_in_dim"(%64) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %538 = mhlo.subtract %533, %536 : tensor<1x7x7x960xf32>
    %539 = mhlo.multiply %538, %534 : tensor<1x7x7x960xf32>
    %540 = mhlo.divide %539, %537 : tensor<1x7x7x960xf32>
    %541 = mhlo.add %540, %535 : tensor<1x7x7x960xf32>
    %542 = "mhlo.clamp"(%0, %541, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %543 = "mhlo.pad"(%542, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
    %544 = mhlo.convolution(%543, %65) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
    %545 = "mhlo.broadcast_in_dim"(%cst_134) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %546 = "mhlo.broadcast_in_dim"(%cst_135) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %547 = "mhlo.broadcast_in_dim"(%cst_136) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %548 = "mhlo.broadcast_in_dim"(%66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %549 = mhlo.subtract %544, %547 : tensor<1x7x7x960xf32>
    %550 = mhlo.multiply %549, %545 : tensor<1x7x7x960xf32>
    %551 = mhlo.divide %550, %548 : tensor<1x7x7x960xf32>
    %552 = mhlo.add %551, %546 : tensor<1x7x7x960xf32>
    %553 = "mhlo.clamp"(%0, %552, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %554 = mhlo.convolution(%553, %cst_126) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
    %555 = "mhlo.broadcast_in_dim"(%cst_127) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %556 = "mhlo.broadcast_in_dim"(%cst_128) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %557 = "mhlo.broadcast_in_dim"(%cst_129) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %558 = "mhlo.broadcast_in_dim"(%67) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
    %559 = mhlo.subtract %554, %557 : tensor<1x7x7x160xf32>
    %560 = mhlo.multiply %559, %555 : tensor<1x7x7x160xf32>
    %561 = mhlo.divide %560, %558 : tensor<1x7x7x160xf32>
    %562 = mhlo.add %561, %556 : tensor<1x7x7x160xf32>
    %563 = chlo.broadcast_add %532, %562 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
    %564 = mhlo.convolution(%563, %cst_119) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
    %565 = "mhlo.broadcast_in_dim"(%cst_120) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %566 = "mhlo.broadcast_in_dim"(%cst_121) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %567 = "mhlo.broadcast_in_dim"(%cst_122) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %568 = "mhlo.broadcast_in_dim"(%68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %569 = mhlo.subtract %564, %567 : tensor<1x7x7x960xf32>
    %570 = mhlo.multiply %569, %565 : tensor<1x7x7x960xf32>
    %571 = mhlo.divide %570, %568 : tensor<1x7x7x960xf32>
    %572 = mhlo.add %571, %566 : tensor<1x7x7x960xf32>
    %573 = "mhlo.clamp"(%0, %572, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %574 = "mhlo.pad"(%573, %cst_192) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
    %575 = mhlo.convolution(%574, %69) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
    %576 = "mhlo.broadcast_in_dim"(%cst_123) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %577 = "mhlo.broadcast_in_dim"(%cst_124) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %578 = "mhlo.broadcast_in_dim"(%cst_125) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %579 = "mhlo.broadcast_in_dim"(%70) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
    %580 = mhlo.subtract %575, %578 : tensor<1x7x7x960xf32>
    %581 = mhlo.multiply %580, %576 : tensor<1x7x7x960xf32>
    %582 = mhlo.divide %581, %579 : tensor<1x7x7x960xf32>
    %583 = mhlo.add %582, %577 : tensor<1x7x7x960xf32>
    %584 = "mhlo.clamp"(%0, %583, %3) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
    %585 = mhlo.convolution(%584, %cst_115) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
    %586 = "mhlo.broadcast_in_dim"(%cst_116) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %587 = "mhlo.broadcast_in_dim"(%cst_117) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %588 = "mhlo.broadcast_in_dim"(%cst_118) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %589 = "mhlo.broadcast_in_dim"(%71) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
    %590 = mhlo.subtract %585, %588 : tensor<1x7x7x320xf32>
    %591 = mhlo.multiply %590, %586 : tensor<1x7x7x320xf32>
    %592 = mhlo.divide %591, %589 : tensor<1x7x7x320xf32>
    %593 = mhlo.add %592, %587 : tensor<1x7x7x320xf32>
    %594 = mhlo.convolution(%593, %cst_8) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
    %595 = "mhlo.broadcast_in_dim"(%cst_9) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %596 = "mhlo.broadcast_in_dim"(%cst_10) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %597 = "mhlo.broadcast_in_dim"(%cst_11) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %598 = "mhlo.broadcast_in_dim"(%72) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
    %599 = mhlo.subtract %594, %597 : tensor<1x7x7x1280xf32>
    %600 = mhlo.multiply %599, %595 : tensor<1x7x7x1280xf32>
    %601 = mhlo.divide %600, %598 : tensor<1x7x7x1280xf32>
    %602 = mhlo.add %601, %596 : tensor<1x7x7x1280xf32>
    %603 = "mhlo.clamp"(%0, %602, %3) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
    %604 = "mhlo.reduce"(%603, %0) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %616 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%616) : (tensor<f32>) -> ()
    }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %605 = chlo.broadcast_divide %604, %2 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
    %606 = "mhlo.dot"(%605, %cst) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
    %607 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
    %608 = mhlo.add %606, %607 : tensor<1x1000xf32>
    %609 = "mhlo.reduce"(%608, %1) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %616 = mhlo.maximum %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%616) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %610 = linalg.tensor_expand_shape %609 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %611 = chlo.broadcast_subtract %608, %610 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    %612 = "mhlo.exponential"(%611) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
    %613 = "mhlo.reduce"(%612, %0) ( {
    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
      %616 = mhlo.add %arg1, %arg2 : tensor<f32>
      "mhlo.return"(%616) : (tensor<f32>) -> ()
    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
    %614 = linalg.tensor_expand_shape %613 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
    %615 = chlo.broadcast_divide %612, %614 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
    return %615 : tensor<1x1000xf32>
  }
 }


 // -----// IR Dump After Canonicalizer //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %0 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %1 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %2 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst = constant dense<0.000000e+00> : tensor<f32>
  %3 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %4 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %5 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %6 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %7 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %8 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %9 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %10 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %11 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %12 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %13 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %14 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %15 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %16 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %17 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %18 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %19 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %20 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %21 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %22 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %23 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %24 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %25 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %26 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %27 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %28 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %29 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %30 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %31 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %32 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %33 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %34 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %35 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %36 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %37 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %38 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %39 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %40 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %41 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %42 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %43 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %44 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %45 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %46 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %47 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %48 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %49 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %50 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %51 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %52 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %53 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %54 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %55 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %56 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %57 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %58 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %59 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %60 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %61 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %62 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %63 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32>
  %64 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %65 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %66 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %67 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32>
  %68 = mhlo.constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %69 = mhlo.constant dense<6.000000e+00> : tensor<f32>
  %70 = mhlo.constant dense<4.900000e+01> : tensor<f32>
  %71 = mhlo.constant dense<0xFF800000> : tensor<f32>
  %72 = mhlo.constant dense<0.000000e+00> : tensor<f32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_2 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_69 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_70 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_71 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_72 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %73 = "mhlo.pad"(%arg0, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x224x224x3xf32>, tensor<f32>) -> tensor<1x225x225x3xf32>
  %74 = mhlo.convolution(%73, %cst_179) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
  %75 = "mhlo.broadcast_in_dim"(%cst_178) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %76 = "mhlo.broadcast_in_dim"(%cst_177) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %77 = "mhlo.broadcast_in_dim"(%cst_176) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %78 = "mhlo.broadcast_in_dim"(%68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %79 = mhlo.subtract %74, %77 : tensor<1x112x112x32xf32>
  %80 = mhlo.multiply %79, %75 : tensor<1x112x112x32xf32>
  %81 = mhlo.divide %80, %78 : tensor<1x112x112x32xf32>
  %82 = mhlo.add %81, %76 : tensor<1x112x112x32xf32>
  %83 = "mhlo.clamp"(%72, %82, %69) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %84 = "mhlo.pad"(%83, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x114x114x32xf32>
  %85 = mhlo.convolution(%84, %67) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 32 : i64} : (tensor<1x114x114x32xf32>, tensor<3x3x32x1xf32>) -> tensor<1x112x112x32xf32>
  %86 = "mhlo.broadcast_in_dim"(%cst_186) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %87 = "mhlo.broadcast_in_dim"(%cst_185) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %88 = "mhlo.broadcast_in_dim"(%cst_184) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %89 = "mhlo.broadcast_in_dim"(%66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x112x112x32xf32>
  %90 = mhlo.subtract %85, %88 : tensor<1x112x112x32xf32>
  %91 = mhlo.multiply %90, %86 : tensor<1x112x112x32xf32>
  %92 = mhlo.divide %91, %89 : tensor<1x112x112x32xf32>
  %93 = mhlo.add %92, %87 : tensor<1x112x112x32xf32>
  %94 = "mhlo.clamp"(%72, %93, %69) : (tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) -> tensor<1x112x112x32xf32>
  %95 = mhlo.convolution(%94, %cst_190) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) -> tensor<1x112x112x16xf32>
  %96 = "mhlo.broadcast_in_dim"(%cst_189) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %97 = "mhlo.broadcast_in_dim"(%cst_188) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %98 = "mhlo.broadcast_in_dim"(%cst_187) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %99 = "mhlo.broadcast_in_dim"(%65) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x112x112x16xf32>
  %100 = mhlo.subtract %95, %98 : tensor<1x112x112x16xf32>
  %101 = mhlo.multiply %100, %96 : tensor<1x112x112x16xf32>
  %102 = mhlo.divide %101, %99 : tensor<1x112x112x16xf32>
  %103 = mhlo.add %102, %97 : tensor<1x112x112x16xf32>
  %104 = mhlo.convolution(%103, %cst_83) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) -> tensor<1x112x112x96xf32>
  %105 = "mhlo.broadcast_in_dim"(%cst_82) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %106 = "mhlo.broadcast_in_dim"(%cst_81) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %107 = "mhlo.broadcast_in_dim"(%cst_80) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %108 = "mhlo.broadcast_in_dim"(%64) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x112x112x96xf32>
  %109 = mhlo.subtract %104, %107 : tensor<1x112x112x96xf32>
  %110 = mhlo.multiply %109, %105 : tensor<1x112x112x96xf32>
  %111 = mhlo.divide %110, %108 : tensor<1x112x112x96xf32>
  %112 = mhlo.add %111, %106 : tensor<1x112x112x96xf32>
  %113 = "mhlo.clamp"(%72, %112, %69) : (tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x112x112x96xf32>
  %114 = "mhlo.pad"(%113, %72) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x96xf32>, tensor<f32>) -> tensor<1x113x113x96xf32>
  %115 = mhlo.convolution(%114, %63) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 96 : i64} : (tensor<1x113x113x96xf32>, tensor<3x3x96x1xf32>) -> tensor<1x56x56x96xf32>
  %116 = "mhlo.broadcast_in_dim"(%cst_79) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %117 = "mhlo.broadcast_in_dim"(%cst_78) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %118 = "mhlo.broadcast_in_dim"(%cst_77) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %119 = "mhlo.broadcast_in_dim"(%62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x56x56x96xf32>
  %120 = mhlo.subtract %115, %118 : tensor<1x56x56x96xf32>
  %121 = mhlo.multiply %120, %116 : tensor<1x56x56x96xf32>
  %122 = mhlo.divide %121, %119 : tensor<1x56x56x96xf32>
  %123 = mhlo.add %122, %117 : tensor<1x56x56x96xf32>
  %124 = "mhlo.clamp"(%72, %123, %69) : (tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) -> tensor<1x56x56x96xf32>
  %125 = mhlo.convolution(%124, %cst_87) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x56x56x24xf32>
  %126 = "mhlo.broadcast_in_dim"(%cst_86) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %127 = "mhlo.broadcast_in_dim"(%cst_85) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %128 = "mhlo.broadcast_in_dim"(%cst_84) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %129 = "mhlo.broadcast_in_dim"(%61) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %130 = mhlo.subtract %125, %128 : tensor<1x56x56x24xf32>
  %131 = mhlo.multiply %130, %126 : tensor<1x56x56x24xf32>
  %132 = mhlo.divide %131, %129 : tensor<1x56x56x24xf32>
  %133 = mhlo.add %132, %127 : tensor<1x56x56x24xf32>
  %134 = mhlo.convolution(%133, %cst_94) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %135 = "mhlo.broadcast_in_dim"(%cst_93) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %136 = "mhlo.broadcast_in_dim"(%cst_92) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %137 = "mhlo.broadcast_in_dim"(%cst_91) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %138 = "mhlo.broadcast_in_dim"(%60) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %139 = mhlo.subtract %134, %137 : tensor<1x56x56x144xf32>
  %140 = mhlo.multiply %139, %135 : tensor<1x56x56x144xf32>
  %141 = mhlo.divide %140, %138 : tensor<1x56x56x144xf32>
  %142 = mhlo.add %141, %136 : tensor<1x56x56x144xf32>
  %143 = "mhlo.clamp"(%72, %142, %69) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %144 = "mhlo.pad"(%143, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x58x58x144xf32>
  %145 = mhlo.convolution(%144, %59) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x58x58x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x56x56x144xf32>
  %146 = "mhlo.broadcast_in_dim"(%cst_90) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %147 = "mhlo.broadcast_in_dim"(%cst_89) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %148 = "mhlo.broadcast_in_dim"(%cst_88) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %149 = "mhlo.broadcast_in_dim"(%58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %150 = mhlo.subtract %145, %148 : tensor<1x56x56x144xf32>
  %151 = mhlo.multiply %150, %146 : tensor<1x56x56x144xf32>
  %152 = mhlo.divide %151, %149 : tensor<1x56x56x144xf32>
  %153 = mhlo.add %152, %147 : tensor<1x56x56x144xf32>
  %154 = "mhlo.clamp"(%72, %153, %69) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %155 = mhlo.convolution(%154, %cst_98) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) -> tensor<1x56x56x24xf32>
  %156 = "mhlo.broadcast_in_dim"(%cst_97) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %157 = "mhlo.broadcast_in_dim"(%cst_96) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %158 = "mhlo.broadcast_in_dim"(%cst_95) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %159 = "mhlo.broadcast_in_dim"(%57) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x56x56x24xf32>
  %160 = mhlo.subtract %155, %158 : tensor<1x56x56x24xf32>
  %161 = mhlo.multiply %160, %156 : tensor<1x56x56x24xf32>
  %162 = mhlo.divide %161, %159 : tensor<1x56x56x24xf32>
  %163 = mhlo.add %162, %157 : tensor<1x56x56x24xf32>
  %164 = chlo.broadcast_add %133, %163 : (tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %165 = mhlo.convolution(%164, %cst_105) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) -> tensor<1x56x56x144xf32>
  %166 = "mhlo.broadcast_in_dim"(%cst_104) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %167 = "mhlo.broadcast_in_dim"(%cst_103) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %168 = "mhlo.broadcast_in_dim"(%cst_102) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %169 = "mhlo.broadcast_in_dim"(%56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x56x56x144xf32>
  %170 = mhlo.subtract %165, %168 : tensor<1x56x56x144xf32>
  %171 = mhlo.multiply %170, %166 : tensor<1x56x56x144xf32>
  %172 = mhlo.divide %171, %169 : tensor<1x56x56x144xf32>
  %173 = mhlo.add %172, %167 : tensor<1x56x56x144xf32>
  %174 = "mhlo.clamp"(%72, %173, %69) : (tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x56x56x144xf32>
  %175 = "mhlo.pad"(%174, %72) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x144xf32>, tensor<f32>) -> tensor<1x57x57x144xf32>
  %176 = mhlo.convolution(%175, %55) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 144 : i64} : (tensor<1x57x57x144xf32>, tensor<3x3x144x1xf32>) -> tensor<1x28x28x144xf32>
  %177 = "mhlo.broadcast_in_dim"(%cst_101) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %178 = "mhlo.broadcast_in_dim"(%cst_100) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %179 = "mhlo.broadcast_in_dim"(%cst_99) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %180 = "mhlo.broadcast_in_dim"(%54) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x28x28x144xf32>
  %181 = mhlo.subtract %176, %179 : tensor<1x28x28x144xf32>
  %182 = mhlo.multiply %181, %177 : tensor<1x28x28x144xf32>
  %183 = mhlo.divide %182, %180 : tensor<1x28x28x144xf32>
  %184 = mhlo.add %183, %178 : tensor<1x28x28x144xf32>
  %185 = "mhlo.clamp"(%72, %184, %69) : (tensor<f32>, tensor<1x28x28x144xf32>, tensor<f32>) -> tensor<1x28x28x144xf32>
  %186 = mhlo.convolution(%185, %cst_109) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x144xf32>, tensor<1x1x144x32xf32>) -> tensor<1x28x28x32xf32>
  %187 = "mhlo.broadcast_in_dim"(%cst_108) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %188 = "mhlo.broadcast_in_dim"(%cst_107) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %189 = "mhlo.broadcast_in_dim"(%cst_106) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %190 = "mhlo.broadcast_in_dim"(%53) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %191 = mhlo.subtract %186, %189 : tensor<1x28x28x32xf32>
  %192 = mhlo.multiply %191, %187 : tensor<1x28x28x32xf32>
  %193 = mhlo.divide %192, %190 : tensor<1x28x28x32xf32>
  %194 = mhlo.add %193, %188 : tensor<1x28x28x32xf32>
  %195 = mhlo.convolution(%194, %cst_116) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %196 = "mhlo.broadcast_in_dim"(%cst_115) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %197 = "mhlo.broadcast_in_dim"(%cst_114) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %198 = "mhlo.broadcast_in_dim"(%cst_113) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %199 = "mhlo.broadcast_in_dim"(%52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %200 = mhlo.subtract %195, %198 : tensor<1x28x28x192xf32>
  %201 = mhlo.multiply %200, %196 : tensor<1x28x28x192xf32>
  %202 = mhlo.divide %201, %199 : tensor<1x28x28x192xf32>
  %203 = mhlo.add %202, %197 : tensor<1x28x28x192xf32>
  %204 = "mhlo.clamp"(%72, %203, %69) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %205 = "mhlo.pad"(%204, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
  %206 = mhlo.convolution(%205, %51) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
  %207 = "mhlo.broadcast_in_dim"(%cst_112) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %208 = "mhlo.broadcast_in_dim"(%cst_111) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %209 = "mhlo.broadcast_in_dim"(%cst_110) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %210 = "mhlo.broadcast_in_dim"(%50) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %211 = mhlo.subtract %206, %209 : tensor<1x28x28x192xf32>
  %212 = mhlo.multiply %211, %207 : tensor<1x28x28x192xf32>
  %213 = mhlo.divide %212, %210 : tensor<1x28x28x192xf32>
  %214 = mhlo.add %213, %208 : tensor<1x28x28x192xf32>
  %215 = "mhlo.clamp"(%72, %214, %69) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %216 = mhlo.convolution(%215, %cst_120) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %217 = "mhlo.broadcast_in_dim"(%cst_119) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %218 = "mhlo.broadcast_in_dim"(%cst_118) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %219 = "mhlo.broadcast_in_dim"(%cst_117) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %220 = "mhlo.broadcast_in_dim"(%49) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %221 = mhlo.subtract %216, %219 : tensor<1x28x28x32xf32>
  %222 = mhlo.multiply %221, %217 : tensor<1x28x28x32xf32>
  %223 = mhlo.divide %222, %220 : tensor<1x28x28x32xf32>
  %224 = mhlo.add %223, %218 : tensor<1x28x28x32xf32>
  %225 = chlo.broadcast_add %194, %224 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %226 = mhlo.convolution(%225, %cst_127) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %227 = "mhlo.broadcast_in_dim"(%cst_126) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %228 = "mhlo.broadcast_in_dim"(%cst_125) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %229 = "mhlo.broadcast_in_dim"(%cst_124) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %230 = "mhlo.broadcast_in_dim"(%48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %231 = mhlo.subtract %226, %229 : tensor<1x28x28x192xf32>
  %232 = mhlo.multiply %231, %227 : tensor<1x28x28x192xf32>
  %233 = mhlo.divide %232, %230 : tensor<1x28x28x192xf32>
  %234 = mhlo.add %233, %228 : tensor<1x28x28x192xf32>
  %235 = "mhlo.clamp"(%72, %234, %69) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %236 = "mhlo.pad"(%235, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x30x30x192xf32>
  %237 = mhlo.convolution(%236, %47) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x30x30x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x28x28x192xf32>
  %238 = "mhlo.broadcast_in_dim"(%cst_123) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %239 = "mhlo.broadcast_in_dim"(%cst_122) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %240 = "mhlo.broadcast_in_dim"(%cst_121) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %241 = "mhlo.broadcast_in_dim"(%46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %242 = mhlo.subtract %237, %240 : tensor<1x28x28x192xf32>
  %243 = mhlo.multiply %242, %238 : tensor<1x28x28x192xf32>
  %244 = mhlo.divide %243, %241 : tensor<1x28x28x192xf32>
  %245 = mhlo.add %244, %239 : tensor<1x28x28x192xf32>
  %246 = "mhlo.clamp"(%72, %245, %69) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %247 = mhlo.convolution(%246, %cst_131) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x192xf32>, tensor<1x1x192x32xf32>) -> tensor<1x28x28x32xf32>
  %248 = "mhlo.broadcast_in_dim"(%cst_130) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %249 = "mhlo.broadcast_in_dim"(%cst_129) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %250 = "mhlo.broadcast_in_dim"(%cst_128) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %251 = "mhlo.broadcast_in_dim"(%45) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x28x28x32xf32>
  %252 = mhlo.subtract %247, %250 : tensor<1x28x28x32xf32>
  %253 = mhlo.multiply %252, %248 : tensor<1x28x28x32xf32>
  %254 = mhlo.divide %253, %251 : tensor<1x28x28x32xf32>
  %255 = mhlo.add %254, %249 : tensor<1x28x28x32xf32>
  %256 = chlo.broadcast_add %225, %255 : (tensor<1x28x28x32xf32>, tensor<1x28x28x32xf32>) -> tensor<1x28x28x32xf32>
  %257 = mhlo.convolution(%256, %cst_138) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x28x28x32xf32>, tensor<1x1x32x192xf32>) -> tensor<1x28x28x192xf32>
  %258 = "mhlo.broadcast_in_dim"(%cst_137) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %259 = "mhlo.broadcast_in_dim"(%cst_136) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %260 = "mhlo.broadcast_in_dim"(%cst_135) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %261 = "mhlo.broadcast_in_dim"(%44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x28x28x192xf32>
  %262 = mhlo.subtract %257, %260 : tensor<1x28x28x192xf32>
  %263 = mhlo.multiply %262, %258 : tensor<1x28x28x192xf32>
  %264 = mhlo.divide %263, %261 : tensor<1x28x28x192xf32>
  %265 = mhlo.add %264, %259 : tensor<1x28x28x192xf32>
  %266 = "mhlo.clamp"(%72, %265, %69) : (tensor<f32>, tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x28x28x192xf32>
  %267 = "mhlo.pad"(%266, %72) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x192xf32>, tensor<f32>) -> tensor<1x29x29x192xf32>
  %268 = mhlo.convolution(%267, %43) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 192 : i64} : (tensor<1x29x29x192xf32>, tensor<3x3x192x1xf32>) -> tensor<1x14x14x192xf32>
  %269 = "mhlo.broadcast_in_dim"(%cst_134) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %270 = "mhlo.broadcast_in_dim"(%cst_133) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %271 = "mhlo.broadcast_in_dim"(%cst_132) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %272 = "mhlo.broadcast_in_dim"(%42) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<192xf32>) -> tensor<1x14x14x192xf32>
  %273 = mhlo.subtract %268, %271 : tensor<1x14x14x192xf32>
  %274 = mhlo.multiply %273, %269 : tensor<1x14x14x192xf32>
  %275 = mhlo.divide %274, %272 : tensor<1x14x14x192xf32>
  %276 = mhlo.add %275, %270 : tensor<1x14x14x192xf32>
  %277 = "mhlo.clamp"(%72, %276, %69) : (tensor<f32>, tensor<1x14x14x192xf32>, tensor<f32>) -> tensor<1x14x14x192xf32>
  %278 = mhlo.convolution(%277, %cst_142) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x192xf32>, tensor<1x1x192x64xf32>) -> tensor<1x14x14x64xf32>
  %279 = "mhlo.broadcast_in_dim"(%cst_141) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %280 = "mhlo.broadcast_in_dim"(%cst_140) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %281 = "mhlo.broadcast_in_dim"(%cst_139) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %282 = "mhlo.broadcast_in_dim"(%41) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %283 = mhlo.subtract %278, %281 : tensor<1x14x14x64xf32>
  %284 = mhlo.multiply %283, %279 : tensor<1x14x14x64xf32>
  %285 = mhlo.divide %284, %282 : tensor<1x14x14x64xf32>
  %286 = mhlo.add %285, %280 : tensor<1x14x14x64xf32>
  %287 = mhlo.convolution(%286, %cst_149) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %288 = "mhlo.broadcast_in_dim"(%cst_148) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %289 = "mhlo.broadcast_in_dim"(%cst_147) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %290 = "mhlo.broadcast_in_dim"(%cst_146) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %291 = "mhlo.broadcast_in_dim"(%40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %292 = mhlo.subtract %287, %290 : tensor<1x14x14x384xf32>
  %293 = mhlo.multiply %292, %288 : tensor<1x14x14x384xf32>
  %294 = mhlo.divide %293, %291 : tensor<1x14x14x384xf32>
  %295 = mhlo.add %294, %289 : tensor<1x14x14x384xf32>
  %296 = "mhlo.clamp"(%72, %295, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %297 = "mhlo.pad"(%296, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %298 = mhlo.convolution(%297, %39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %299 = "mhlo.broadcast_in_dim"(%cst_145) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %300 = "mhlo.broadcast_in_dim"(%cst_144) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %301 = "mhlo.broadcast_in_dim"(%cst_143) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %302 = "mhlo.broadcast_in_dim"(%38) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %303 = mhlo.subtract %298, %301 : tensor<1x14x14x384xf32>
  %304 = mhlo.multiply %303, %299 : tensor<1x14x14x384xf32>
  %305 = mhlo.divide %304, %302 : tensor<1x14x14x384xf32>
  %306 = mhlo.add %305, %300 : tensor<1x14x14x384xf32>
  %307 = "mhlo.clamp"(%72, %306, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %308 = mhlo.convolution(%307, %cst_153) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %309 = "mhlo.broadcast_in_dim"(%cst_152) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %310 = "mhlo.broadcast_in_dim"(%cst_151) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %311 = "mhlo.broadcast_in_dim"(%cst_150) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %312 = "mhlo.broadcast_in_dim"(%37) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %313 = mhlo.subtract %308, %311 : tensor<1x14x14x64xf32>
  %314 = mhlo.multiply %313, %309 : tensor<1x14x14x64xf32>
  %315 = mhlo.divide %314, %312 : tensor<1x14x14x64xf32>
  %316 = mhlo.add %315, %310 : tensor<1x14x14x64xf32>
  %317 = chlo.broadcast_add %286, %316 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %318 = mhlo.convolution(%317, %cst_160) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %319 = "mhlo.broadcast_in_dim"(%cst_159) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %320 = "mhlo.broadcast_in_dim"(%cst_158) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %321 = "mhlo.broadcast_in_dim"(%cst_157) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %322 = "mhlo.broadcast_in_dim"(%36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %323 = mhlo.subtract %318, %321 : tensor<1x14x14x384xf32>
  %324 = mhlo.multiply %323, %319 : tensor<1x14x14x384xf32>
  %325 = mhlo.divide %324, %322 : tensor<1x14x14x384xf32>
  %326 = mhlo.add %325, %320 : tensor<1x14x14x384xf32>
  %327 = "mhlo.clamp"(%72, %326, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %328 = "mhlo.pad"(%327, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %329 = mhlo.convolution(%328, %35) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %330 = "mhlo.broadcast_in_dim"(%cst_156) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %331 = "mhlo.broadcast_in_dim"(%cst_155) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %332 = "mhlo.broadcast_in_dim"(%cst_154) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %333 = "mhlo.broadcast_in_dim"(%34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %334 = mhlo.subtract %329, %332 : tensor<1x14x14x384xf32>
  %335 = mhlo.multiply %334, %330 : tensor<1x14x14x384xf32>
  %336 = mhlo.divide %335, %333 : tensor<1x14x14x384xf32>
  %337 = mhlo.add %336, %331 : tensor<1x14x14x384xf32>
  %338 = "mhlo.clamp"(%72, %337, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %339 = mhlo.convolution(%338, %cst_164) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %340 = "mhlo.broadcast_in_dim"(%cst_163) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %341 = "mhlo.broadcast_in_dim"(%cst_162) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %342 = "mhlo.broadcast_in_dim"(%cst_161) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %343 = "mhlo.broadcast_in_dim"(%33) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %344 = mhlo.subtract %339, %342 : tensor<1x14x14x64xf32>
  %345 = mhlo.multiply %344, %340 : tensor<1x14x14x64xf32>
  %346 = mhlo.divide %345, %343 : tensor<1x14x14x64xf32>
  %347 = mhlo.add %346, %341 : tensor<1x14x14x64xf32>
  %348 = chlo.broadcast_add %317, %347 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %349 = mhlo.convolution(%348, %cst_171) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %350 = "mhlo.broadcast_in_dim"(%cst_170) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %351 = "mhlo.broadcast_in_dim"(%cst_169) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %352 = "mhlo.broadcast_in_dim"(%cst_168) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %353 = "mhlo.broadcast_in_dim"(%32) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %354 = mhlo.subtract %349, %352 : tensor<1x14x14x384xf32>
  %355 = mhlo.multiply %354, %350 : tensor<1x14x14x384xf32>
  %356 = mhlo.divide %355, %353 : tensor<1x14x14x384xf32>
  %357 = mhlo.add %356, %351 : tensor<1x14x14x384xf32>
  %358 = "mhlo.clamp"(%72, %357, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %359 = "mhlo.pad"(%358, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %360 = mhlo.convolution(%359, %31) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %361 = "mhlo.broadcast_in_dim"(%cst_167) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %362 = "mhlo.broadcast_in_dim"(%cst_166) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %363 = "mhlo.broadcast_in_dim"(%cst_165) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %364 = "mhlo.broadcast_in_dim"(%30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %365 = mhlo.subtract %360, %363 : tensor<1x14x14x384xf32>
  %366 = mhlo.multiply %365, %361 : tensor<1x14x14x384xf32>
  %367 = mhlo.divide %366, %364 : tensor<1x14x14x384xf32>
  %368 = mhlo.add %367, %362 : tensor<1x14x14x384xf32>
  %369 = "mhlo.clamp"(%72, %368, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %370 = mhlo.convolution(%369, %cst_175) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x64xf32>) -> tensor<1x14x14x64xf32>
  %371 = "mhlo.broadcast_in_dim"(%cst_174) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %372 = "mhlo.broadcast_in_dim"(%cst_173) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %373 = "mhlo.broadcast_in_dim"(%cst_172) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %374 = "mhlo.broadcast_in_dim"(%29) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
  %375 = mhlo.subtract %370, %373 : tensor<1x14x14x64xf32>
  %376 = mhlo.multiply %375, %371 : tensor<1x14x14x64xf32>
  %377 = mhlo.divide %376, %374 : tensor<1x14x14x64xf32>
  %378 = mhlo.add %377, %372 : tensor<1x14x14x64xf32>
  %379 = chlo.broadcast_add %348, %378 : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
  %380 = mhlo.convolution(%379, %cst_6) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x64xf32>, tensor<1x1x64x384xf32>) -> tensor<1x14x14x384xf32>
  %381 = "mhlo.broadcast_in_dim"(%cst_5) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %382 = "mhlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %383 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %384 = "mhlo.broadcast_in_dim"(%28) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %385 = mhlo.subtract %380, %383 : tensor<1x14x14x384xf32>
  %386 = mhlo.multiply %385, %381 : tensor<1x14x14x384xf32>
  %387 = mhlo.divide %386, %384 : tensor<1x14x14x384xf32>
  %388 = mhlo.add %387, %382 : tensor<1x14x14x384xf32>
  %389 = "mhlo.clamp"(%72, %388, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %390 = "mhlo.pad"(%389, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x16x16x384xf32>
  %391 = mhlo.convolution(%390, %27) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 384 : i64} : (tensor<1x16x16x384xf32>, tensor<3x3x384x1xf32>) -> tensor<1x14x14x384xf32>
  %392 = "mhlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %393 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %394 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %395 = "mhlo.broadcast_in_dim"(%26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<384xf32>) -> tensor<1x14x14x384xf32>
  %396 = mhlo.subtract %391, %394 : tensor<1x14x14x384xf32>
  %397 = mhlo.multiply %396, %392 : tensor<1x14x14x384xf32>
  %398 = mhlo.divide %397, %395 : tensor<1x14x14x384xf32>
  %399 = mhlo.add %398, %393 : tensor<1x14x14x384xf32>
  %400 = "mhlo.clamp"(%72, %399, %69) : (tensor<f32>, tensor<1x14x14x384xf32>, tensor<f32>) -> tensor<1x14x14x384xf32>
  %401 = mhlo.convolution(%400, %cst_10) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x384xf32>, tensor<1x1x384x96xf32>) -> tensor<1x14x14x96xf32>
  %402 = "mhlo.broadcast_in_dim"(%cst_9) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %403 = "mhlo.broadcast_in_dim"(%cst_8) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %404 = "mhlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %405 = "mhlo.broadcast_in_dim"(%25) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %406 = mhlo.subtract %401, %404 : tensor<1x14x14x96xf32>
  %407 = mhlo.multiply %406, %402 : tensor<1x14x14x96xf32>
  %408 = mhlo.divide %407, %405 : tensor<1x14x14x96xf32>
  %409 = mhlo.add %408, %403 : tensor<1x14x14x96xf32>
  %410 = mhlo.convolution(%409, %cst_17) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %411 = "mhlo.broadcast_in_dim"(%cst_16) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %412 = "mhlo.broadcast_in_dim"(%cst_15) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %413 = "mhlo.broadcast_in_dim"(%cst_14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %414 = "mhlo.broadcast_in_dim"(%24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %415 = mhlo.subtract %410, %413 : tensor<1x14x14x576xf32>
  %416 = mhlo.multiply %415, %411 : tensor<1x14x14x576xf32>
  %417 = mhlo.divide %416, %414 : tensor<1x14x14x576xf32>
  %418 = mhlo.add %417, %412 : tensor<1x14x14x576xf32>
  %419 = "mhlo.clamp"(%72, %418, %69) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %420 = "mhlo.pad"(%419, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
  %421 = mhlo.convolution(%420, %23) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
  %422 = "mhlo.broadcast_in_dim"(%cst_13) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %423 = "mhlo.broadcast_in_dim"(%cst_12) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %424 = "mhlo.broadcast_in_dim"(%cst_11) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %425 = "mhlo.broadcast_in_dim"(%22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %426 = mhlo.subtract %421, %424 : tensor<1x14x14x576xf32>
  %427 = mhlo.multiply %426, %422 : tensor<1x14x14x576xf32>
  %428 = mhlo.divide %427, %425 : tensor<1x14x14x576xf32>
  %429 = mhlo.add %428, %423 : tensor<1x14x14x576xf32>
  %430 = "mhlo.clamp"(%72, %429, %69) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %431 = mhlo.convolution(%430, %cst_21) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %432 = "mhlo.broadcast_in_dim"(%cst_20) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %433 = "mhlo.broadcast_in_dim"(%cst_19) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %434 = "mhlo.broadcast_in_dim"(%cst_18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %435 = "mhlo.broadcast_in_dim"(%21) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %436 = mhlo.subtract %431, %434 : tensor<1x14x14x96xf32>
  %437 = mhlo.multiply %436, %432 : tensor<1x14x14x96xf32>
  %438 = mhlo.divide %437, %435 : tensor<1x14x14x96xf32>
  %439 = mhlo.add %438, %433 : tensor<1x14x14x96xf32>
  %440 = chlo.broadcast_add %409, %439 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %441 = mhlo.convolution(%440, %cst_28) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %442 = "mhlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %443 = "mhlo.broadcast_in_dim"(%cst_26) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %444 = "mhlo.broadcast_in_dim"(%cst_25) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %445 = "mhlo.broadcast_in_dim"(%20) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %446 = mhlo.subtract %441, %444 : tensor<1x14x14x576xf32>
  %447 = mhlo.multiply %446, %442 : tensor<1x14x14x576xf32>
  %448 = mhlo.divide %447, %445 : tensor<1x14x14x576xf32>
  %449 = mhlo.add %448, %443 : tensor<1x14x14x576xf32>
  %450 = "mhlo.clamp"(%72, %449, %69) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %451 = "mhlo.pad"(%450, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x16x16x576xf32>
  %452 = mhlo.convolution(%451, %19) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x16x16x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x14x14x576xf32>
  %453 = "mhlo.broadcast_in_dim"(%cst_24) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %454 = "mhlo.broadcast_in_dim"(%cst_23) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %455 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %456 = "mhlo.broadcast_in_dim"(%18) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %457 = mhlo.subtract %452, %455 : tensor<1x14x14x576xf32>
  %458 = mhlo.multiply %457, %453 : tensor<1x14x14x576xf32>
  %459 = mhlo.divide %458, %456 : tensor<1x14x14x576xf32>
  %460 = mhlo.add %459, %454 : tensor<1x14x14x576xf32>
  %461 = "mhlo.clamp"(%72, %460, %69) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %462 = mhlo.convolution(%461, %cst_32) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x14x14x96xf32>
  %463 = "mhlo.broadcast_in_dim"(%cst_31) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %464 = "mhlo.broadcast_in_dim"(%cst_30) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %465 = "mhlo.broadcast_in_dim"(%cst_29) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %466 = "mhlo.broadcast_in_dim"(%17) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x14x14x96xf32>
  %467 = mhlo.subtract %462, %465 : tensor<1x14x14x96xf32>
  %468 = mhlo.multiply %467, %463 : tensor<1x14x14x96xf32>
  %469 = mhlo.divide %468, %466 : tensor<1x14x14x96xf32>
  %470 = mhlo.add %469, %464 : tensor<1x14x14x96xf32>
  %471 = chlo.broadcast_add %440, %470 : (tensor<1x14x14x96xf32>, tensor<1x14x14x96xf32>) -> tensor<1x14x14x96xf32>
  %472 = mhlo.convolution(%471, %cst_39) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x14x14x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x14x14x576xf32>
  %473 = "mhlo.broadcast_in_dim"(%cst_38) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %474 = "mhlo.broadcast_in_dim"(%cst_37) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %475 = "mhlo.broadcast_in_dim"(%cst_36) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %476 = "mhlo.broadcast_in_dim"(%16) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x14x14x576xf32>
  %477 = mhlo.subtract %472, %475 : tensor<1x14x14x576xf32>
  %478 = mhlo.multiply %477, %473 : tensor<1x14x14x576xf32>
  %479 = mhlo.divide %478, %476 : tensor<1x14x14x576xf32>
  %480 = mhlo.add %479, %474 : tensor<1x14x14x576xf32>
  %481 = "mhlo.clamp"(%72, %480, %69) : (tensor<f32>, tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x14x14x576xf32>
  %482 = "mhlo.pad"(%481, %72) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x576xf32>, tensor<f32>) -> tensor<1x15x15x576xf32>
  %483 = mhlo.convolution(%482, %15) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 576 : i64} : (tensor<1x15x15x576xf32>, tensor<3x3x576x1xf32>) -> tensor<1x7x7x576xf32>
  %484 = "mhlo.broadcast_in_dim"(%cst_35) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %485 = "mhlo.broadcast_in_dim"(%cst_34) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %486 = "mhlo.broadcast_in_dim"(%cst_33) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %487 = "mhlo.broadcast_in_dim"(%14) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x7x7x576xf32>
  %488 = mhlo.subtract %483, %486 : tensor<1x7x7x576xf32>
  %489 = mhlo.multiply %488, %484 : tensor<1x7x7x576xf32>
  %490 = mhlo.divide %489, %487 : tensor<1x7x7x576xf32>
  %491 = mhlo.add %490, %485 : tensor<1x7x7x576xf32>
  %492 = "mhlo.clamp"(%72, %491, %69) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
  %493 = mhlo.convolution(%492, %cst_43) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x576xf32>, tensor<1x1x576x160xf32>) -> tensor<1x7x7x160xf32>
  %494 = "mhlo.broadcast_in_dim"(%cst_42) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %495 = "mhlo.broadcast_in_dim"(%cst_41) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %496 = "mhlo.broadcast_in_dim"(%cst_40) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %497 = "mhlo.broadcast_in_dim"(%13) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %498 = mhlo.subtract %493, %496 : tensor<1x7x7x160xf32>
  %499 = mhlo.multiply %498, %494 : tensor<1x7x7x160xf32>
  %500 = mhlo.divide %499, %497 : tensor<1x7x7x160xf32>
  %501 = mhlo.add %500, %495 : tensor<1x7x7x160xf32>
  %502 = mhlo.convolution(%501, %cst_50) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %503 = "mhlo.broadcast_in_dim"(%cst_49) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %504 = "mhlo.broadcast_in_dim"(%cst_48) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %505 = "mhlo.broadcast_in_dim"(%cst_47) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %506 = "mhlo.broadcast_in_dim"(%12) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %507 = mhlo.subtract %502, %505 : tensor<1x7x7x960xf32>
  %508 = mhlo.multiply %507, %503 : tensor<1x7x7x960xf32>
  %509 = mhlo.divide %508, %506 : tensor<1x7x7x960xf32>
  %510 = mhlo.add %509, %504 : tensor<1x7x7x960xf32>
  %511 = "mhlo.clamp"(%72, %510, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %512 = "mhlo.pad"(%511, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %513 = mhlo.convolution(%512, %11) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %514 = "mhlo.broadcast_in_dim"(%cst_46) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %515 = "mhlo.broadcast_in_dim"(%cst_45) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %516 = "mhlo.broadcast_in_dim"(%cst_44) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %517 = "mhlo.broadcast_in_dim"(%10) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %518 = mhlo.subtract %513, %516 : tensor<1x7x7x960xf32>
  %519 = mhlo.multiply %518, %514 : tensor<1x7x7x960xf32>
  %520 = mhlo.divide %519, %517 : tensor<1x7x7x960xf32>
  %521 = mhlo.add %520, %515 : tensor<1x7x7x960xf32>
  %522 = "mhlo.clamp"(%72, %521, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %523 = mhlo.convolution(%522, %cst_54) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %524 = "mhlo.broadcast_in_dim"(%cst_53) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %525 = "mhlo.broadcast_in_dim"(%cst_52) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %526 = "mhlo.broadcast_in_dim"(%cst_51) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %527 = "mhlo.broadcast_in_dim"(%9) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %528 = mhlo.subtract %523, %526 : tensor<1x7x7x160xf32>
  %529 = mhlo.multiply %528, %524 : tensor<1x7x7x160xf32>
  %530 = mhlo.divide %529, %527 : tensor<1x7x7x160xf32>
  %531 = mhlo.add %530, %525 : tensor<1x7x7x160xf32>
  %532 = chlo.broadcast_add %501, %531 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %533 = mhlo.convolution(%532, %cst_61) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %534 = "mhlo.broadcast_in_dim"(%cst_60) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %535 = "mhlo.broadcast_in_dim"(%cst_59) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %536 = "mhlo.broadcast_in_dim"(%cst_58) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %537 = "mhlo.broadcast_in_dim"(%8) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %538 = mhlo.subtract %533, %536 : tensor<1x7x7x960xf32>
  %539 = mhlo.multiply %538, %534 : tensor<1x7x7x960xf32>
  %540 = mhlo.divide %539, %537 : tensor<1x7x7x960xf32>
  %541 = mhlo.add %540, %535 : tensor<1x7x7x960xf32>
  %542 = "mhlo.clamp"(%72, %541, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %543 = "mhlo.pad"(%542, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %544 = mhlo.convolution(%543, %7) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %545 = "mhlo.broadcast_in_dim"(%cst_57) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %546 = "mhlo.broadcast_in_dim"(%cst_56) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %547 = "mhlo.broadcast_in_dim"(%cst_55) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %548 = "mhlo.broadcast_in_dim"(%6) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %549 = mhlo.subtract %544, %547 : tensor<1x7x7x960xf32>
  %550 = mhlo.multiply %549, %545 : tensor<1x7x7x960xf32>
  %551 = mhlo.divide %550, %548 : tensor<1x7x7x960xf32>
  %552 = mhlo.add %551, %546 : tensor<1x7x7x960xf32>
  %553 = "mhlo.clamp"(%72, %552, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %554 = mhlo.convolution(%553, %cst_65) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x160xf32>) -> tensor<1x7x7x160xf32>
  %555 = "mhlo.broadcast_in_dim"(%cst_64) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %556 = "mhlo.broadcast_in_dim"(%cst_63) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %557 = "mhlo.broadcast_in_dim"(%cst_62) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %558 = "mhlo.broadcast_in_dim"(%5) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<160xf32>) -> tensor<1x7x7x160xf32>
  %559 = mhlo.subtract %554, %557 : tensor<1x7x7x160xf32>
  %560 = mhlo.multiply %559, %555 : tensor<1x7x7x160xf32>
  %561 = mhlo.divide %560, %558 : tensor<1x7x7x160xf32>
  %562 = mhlo.add %561, %556 : tensor<1x7x7x160xf32>
  %563 = chlo.broadcast_add %532, %562 : (tensor<1x7x7x160xf32>, tensor<1x7x7x160xf32>) -> tensor<1x7x7x160xf32>
  %564 = mhlo.convolution(%563, %cst_72) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x160xf32>, tensor<1x1x160x960xf32>) -> tensor<1x7x7x960xf32>
  %565 = "mhlo.broadcast_in_dim"(%cst_71) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %566 = "mhlo.broadcast_in_dim"(%cst_70) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %567 = "mhlo.broadcast_in_dim"(%cst_69) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %568 = "mhlo.broadcast_in_dim"(%4) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %569 = mhlo.subtract %564, %567 : tensor<1x7x7x960xf32>
  %570 = mhlo.multiply %569, %565 : tensor<1x7x7x960xf32>
  %571 = mhlo.divide %570, %568 : tensor<1x7x7x960xf32>
  %572 = mhlo.add %571, %566 : tensor<1x7x7x960xf32>
  %573 = "mhlo.clamp"(%72, %572, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %574 = "mhlo.pad"(%573, %cst) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x9x9x960xf32>
  %575 = mhlo.convolution(%574, %3) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 960 : i64} : (tensor<1x9x9x960xf32>, tensor<3x3x960x1xf32>) -> tensor<1x7x7x960xf32>
  %576 = "mhlo.broadcast_in_dim"(%cst_68) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %577 = "mhlo.broadcast_in_dim"(%cst_67) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %578 = "mhlo.broadcast_in_dim"(%cst_66) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %579 = "mhlo.broadcast_in_dim"(%2) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<960xf32>) -> tensor<1x7x7x960xf32>
  %580 = mhlo.subtract %575, %578 : tensor<1x7x7x960xf32>
  %581 = mhlo.multiply %580, %576 : tensor<1x7x7x960xf32>
  %582 = mhlo.divide %581, %579 : tensor<1x7x7x960xf32>
  %583 = mhlo.add %582, %577 : tensor<1x7x7x960xf32>
  %584 = "mhlo.clamp"(%72, %583, %69) : (tensor<f32>, tensor<1x7x7x960xf32>, tensor<f32>) -> tensor<1x7x7x960xf32>
  %585 = mhlo.convolution(%584, %cst_76) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x960xf32>, tensor<1x1x960x320xf32>) -> tensor<1x7x7x320xf32>
  %586 = "mhlo.broadcast_in_dim"(%cst_75) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %587 = "mhlo.broadcast_in_dim"(%cst_74) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %588 = "mhlo.broadcast_in_dim"(%cst_73) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %589 = "mhlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<320xf32>) -> tensor<1x7x7x320xf32>
  %590 = mhlo.subtract %585, %588 : tensor<1x7x7x320xf32>
  %591 = mhlo.multiply %590, %586 : tensor<1x7x7x320xf32>
  %592 = mhlo.divide %591, %589 : tensor<1x7x7x320xf32>
  %593 = mhlo.add %592, %587 : tensor<1x7x7x320xf32>
  %594 = mhlo.convolution(%593, %cst_183) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x7x7x320xf32>, tensor<1x1x320x1280xf32>) -> tensor<1x7x7x1280xf32>
  %595 = "mhlo.broadcast_in_dim"(%cst_182) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %596 = "mhlo.broadcast_in_dim"(%cst_181) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %597 = "mhlo.broadcast_in_dim"(%cst_180) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %598 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1280xf32>) -> tensor<1x7x7x1280xf32>
  %599 = mhlo.subtract %594, %597 : tensor<1x7x7x1280xf32>
  %600 = mhlo.multiply %599, %595 : tensor<1x7x7x1280xf32>
  %601 = mhlo.divide %600, %598 : tensor<1x7x7x1280xf32>
  %602 = mhlo.add %601, %596 : tensor<1x7x7x1280xf32>
  %603 = "mhlo.clamp"(%72, %602, %69) : (tensor<f32>, tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x7x7x1280xf32>
  %604 = "mhlo.reduce"(%603, %72) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %605 = chlo.broadcast_divide %604, %70 {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<1x1280xf32>, tensor<f32>) -> tensor<1x1280xf32>
  %606 = "mhlo.dot"(%605, %cst_192) : (tensor<1x1280xf32>, tensor<1280x1000xf32>) -> tensor<1x1000xf32>
  %607 = "mhlo.broadcast_in_dim"(%cst_191) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1000xf32>
  %608 = mhlo.add %606, %607 : tensor<1x1000xf32>
  %609 = "mhlo.reduce"(%608, %71) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.maximum %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %610 = linalg.tensor_expand_shape %609 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %611 = chlo.broadcast_subtract %608, %610 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  %612 = "mhlo.exponential"(%611) : (tensor<1x1000xf32>) -> tensor<1x1000xf32>
  %613 = "mhlo.reduce"(%612, %72) ( {
  ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
    %616 = mhlo.add %arg1, %arg2 : tensor<f32>
    "mhlo.return"(%616) : (tensor<f32>) -> ()
  }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x1000xf32>, tensor<f32>) -> tensor<1xf32>
  %614 = linalg.tensor_expand_shape %613 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  %615 = chlo.broadcast_divide %612, %614 : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32>
  return %615 : tensor<1x1000xf32>
 }

 // -----// IR Dump After ConvertMHLOToLinalgOnTensors //----- //
 builtin.func private @"__inference_<lambda>_133580"(%arg0: tensor<1x224x224x3xf32> {tf._user_specified_name = "x"}) -> tensor<1x1000xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x224x224x3>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} {
  %cst = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_0 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_1 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_2 = constant dense<0.000000e+00> : tensor<f32>
  %cst_3 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %cst_4 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_5 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_6 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_7 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %cst_8 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_9 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_10 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_11 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x960x1xf32>
  %cst_12 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_13 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_14 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_15 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %cst_16 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_17 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_18 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_19 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %cst_20 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_21 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_22 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_23 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x576x1xf32>
  %cst_24 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_25 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_26 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_27 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %cst_28 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_29 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_30 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_31 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %cst_32 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_33 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_34 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_35 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %cst_36 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_37 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_38 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_39 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x384x1xf32>
  %cst_40 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_41 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_42 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_43 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %cst_44 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_45 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_46 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_47 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %cst_48 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_49 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_50 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_51 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x192x1xf32>
  %cst_52 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_53 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_54 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_55 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %cst_56 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_57 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_58 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_59 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x144x1xf32>
  %cst_60 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_61 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_62 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_63 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x96x1xf32>
  %cst_64 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_65 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_66 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_67 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32>
  %cst_68 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_69 = constant dense<6.000000e+00> : tensor<f32>
  %cst_70 = constant dense<4.900000e+01> : tensor<f32>
  %cst_71 = constant dense<0xFF800000> : tensor<f32>
  %cst_72 = constant dense<0.000000e+00> : tensor<f32>
  %cst_73 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_74 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_75 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_76 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_77 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_78 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_79 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_80 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_81 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_82 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_83 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x96xf32>
  %cst_84 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_85 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_86 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_87 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_88 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_89 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_90 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_91 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_92 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_93 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_94 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_95 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_96 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_97 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_98 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_99 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_100 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_101 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_102 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_103 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_104 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_105 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x96xf32>
  %cst_106 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_107 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_108 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_109 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_110 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_111 = constant opaque<"_", "0xDEADBEEF"> : tensor<576xf32>
  %cst_112 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x576xf32>
  %cst_113 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_114 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_115 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_116 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x576x160xf32>
  %cst_117 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_118 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_119 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_120 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_121 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_122 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_123 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_124 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_125 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_126 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_127 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_128 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_129 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_130 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_131 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_132 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_133 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_134 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_135 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_136 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_137 = constant opaque<"_", "0xDEADBEEF"> : tensor<160xf32>
  %cst_138 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x160xf32>
  %cst_139 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_140 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_141 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_142 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_143 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_144 = constant opaque<"_", "0xDEADBEEF"> : tensor<960xf32>
  %cst_145 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x160x960xf32>
  %cst_146 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_147 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_148 = constant opaque<"_", "0xDEADBEEF"> : tensor<320xf32>
  %cst_149 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x960x320xf32>
  %cst_150 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_151 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_152 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_153 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_154 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_155 = constant opaque<"_", "0xDEADBEEF"> : tensor<96xf32>
  %cst_156 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x16x96xf32>
  %cst_157 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_158 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_159 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_160 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x96x24xf32>
  %cst_161 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_162 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_163 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_164 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_165 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_166 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_167 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_168 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_169 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_170 = constant opaque<"_", "0xDEADBEEF"> : tensor<24xf32>
  %cst_171 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x24xf32>
  %cst_172 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_173 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_174 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_175 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_176 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_177 = constant opaque<"_", "0xDEADBEEF"> : tensor<144xf32>
  %cst_178 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x24x144xf32>
  %cst_179 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_180 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_181 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_182 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x144x32xf32>
  %cst_183 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_184 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_185 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_186 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_187 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_188 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_189 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_190 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_191 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_192 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_193 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_194 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_195 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_196 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_197 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_198 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_199 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_200 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_201 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_202 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_203 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_204 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x32xf32>
  %cst_205 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_206 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_207 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_208 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_209 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_210 = constant opaque<"_", "0xDEADBEEF"> : tensor<192xf32>
  %cst_211 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x192xf32>
  %cst_212 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_213 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_214 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_215 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x192x64xf32>
  %cst_216 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_217 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_218 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_219 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_220 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_221 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_222 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_223 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_224 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_225 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_226 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_227 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_228 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_229 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_230 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_231 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_232 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_233 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_234 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_235 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_236 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_237 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_238 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_239 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_240 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_241 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_242 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_243 = constant opaque<"_", "0xDEADBEEF"> : tensor<384xf32>
  %cst_244 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x64x384xf32>
  %cst_245 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_246 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_247 = constant opaque<"_", "0xDEADBEEF"> : tensor<64xf32>
  %cst_248 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x384x64xf32>
  %cst_249 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_250 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_251 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_252 = constant opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32>
  %cst_253 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_254 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_255 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280xf32>
  %cst_256 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x320x1280xf32>
  %cst_257 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_258 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_259 = constant opaque<"_", "0xDEADBEEF"> : tensor<32xf32>
  %cst_260 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_261 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_262 = constant opaque<"_", "0xDEADBEEF"> : tensor<16xf32>
  %cst_263 = constant opaque<"_", "0xDEADBEEF"> : tensor<1x1x32x16xf32>
  %cst_264 = constant opaque<"_", "0xDEADBEEF"> : tensor<1000xf32>
  %cst_265 = constant opaque<"_", "0xDEADBEEF"> : tensor<1280x1000xf32>
  %cst_266 = constant 0.000000e+00 : f32
  %c0 = constant 0 : index
  %c0_267 = constant 0 : index
  %c0_268 = constant 0 : index
  %c1 = constant 1 : index
  %c0_269 = constant 0 : index
  %c1_270 = constant 1 : index
  %c0_271 = constant 0 : index
  %c0_272 = constant 0 : index
  %0 = linalg.pad_tensor %arg0 low[%c0, %c0_268, %c0_269, %c0_271] high[%c0_267, %c1, %c1_270, %c0_272]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst_266 : f32
  } : tensor<1x224x224x3xf32> to tensor<1x225x225x3xf32> 
  %1 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %cst_273 = constant 0.000000e+00 : f32
  %2 = linalg.fill(%cst_273, %1) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> 
  %3 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %cst_252 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%2 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
  %4 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_251 : tensor<32xf32>) outs(%4 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %6 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_250 : tensor<32xf32>) outs(%6 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %8 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %9 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_249 : tensor<32xf32>) outs(%8 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %10 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_68 : tensor<32xf32>) outs(%10 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %12 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %13 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%3, %9 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%12 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %14 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %15 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %5 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%14 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %16 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %17 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%15, %11 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%16 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %18 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %19 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%17, %7 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%18 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %20 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %21 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %19, %cst_69 : tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) outs(%20 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x112x112x32xf32>
  %cst_274 = constant 0.000000e+00 : f32
  %c0_275 = constant 0 : index
  %c0_276 = constant 0 : index
  %c1_277 = constant 1 : index
  %c1_278 = constant 1 : index
  %c1_279 = constant 1 : index
  %c1_280 = constant 1 : index
  %c0_281 = constant 0 : index
  %c0_282 = constant 0 : index
  %22 = linalg.pad_tensor %21 low[%c0_275, %c1_277, %c1_279, %c0_281] high[%c0_276, %c1_278, %c1_280, %c0_282]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst_274 : f32
  } : tensor<1x112x112x32xf32> to tensor<1x114x114x32xf32> 
  %23 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %cst_283 = constant 0.000000e+00 : f32
  %24 = linalg.fill(%cst_283, %23) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> 
  %25 = linalg.tensor_collapse_shape %cst_67 [[0], [1], [2, 3]] : tensor<3x3x32x1xf32> into tensor<3x3x32xf32>
  %26 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%22, %25 : tensor<1x114x114x32xf32>, tensor<3x3x32xf32>) outs(%24 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
  %27 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %28 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_259 : tensor<32xf32>) outs(%27 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %29 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %30 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_258 : tensor<32xf32>) outs(%29 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %31 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %32 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_257 : tensor<32xf32>) outs(%31 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %33 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %34 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_66 : tensor<32xf32>) outs(%33 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x32xf32>
  %35 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %36 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26, %32 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%35 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %37 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %38 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%36, %28 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%37 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %39 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %40 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%38, %34 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%39 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %41 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %42 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %30 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) outs(%41 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x32xf32>
  %43 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
  %44 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %42, %cst_69 : tensor<f32>, tensor<1x112x112x32xf32>, tensor<f32>) outs(%43 : tensor<1x112x112x32xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x112x112x32xf32>
  %45 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %cst_284 = constant 0.000000e+00 : f32
  %46 = linalg.fill(%cst_284, %45) : f32, tensor<1x112x112x16xf32> -> tensor<1x112x112x16xf32> 
  %47 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%44, %cst_263 : tensor<1x112x112x32xf32>, tensor<1x1x32x16xf32>) outs(%46 : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
  %48 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %49 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_262 : tensor<16xf32>) outs(%48 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x16xf32>
  %50 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %51 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_261 : tensor<16xf32>) outs(%50 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x16xf32>
  %52 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %53 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_260 : tensor<16xf32>) outs(%52 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x16xf32>
  %54 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %55 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_65 : tensor<16xf32>) outs(%54 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x16xf32>
  %56 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %57 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%47, %53 : tensor<1x112x112x16xf32>, tensor<1x112x112x16xf32>) outs(%56 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x16xf32>
  %58 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %59 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%57, %49 : tensor<1x112x112x16xf32>, tensor<1x112x112x16xf32>) outs(%58 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x16xf32>
  %60 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %61 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%59, %55 : tensor<1x112x112x16xf32>, tensor<1x112x112x16xf32>) outs(%60 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x16xf32>
  %62 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
  %63 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%61, %51 : tensor<1x112x112x16xf32>, tensor<1x112x112x16xf32>) outs(%62 : tensor<1x112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x16xf32>
  %64 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %cst_285 = constant 0.000000e+00 : f32
  %65 = linalg.fill(%cst_285, %64) : f32, tensor<1x112x112x96xf32> -> tensor<1x112x112x96xf32> 
  %66 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%63, %cst_156 : tensor<1x112x112x16xf32>, tensor<1x1x16x96xf32>) outs(%65 : tensor<1x112x112x96xf32>) -> tensor<1x112x112x96xf32>
  %67 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %68 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_155 : tensor<96xf32>) outs(%67 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x96xf32>
  %69 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %70 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_154 : tensor<96xf32>) outs(%69 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x96xf32>
  %71 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %72 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_153 : tensor<96xf32>) outs(%71 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x96xf32>
  %73 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %74 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_64 : tensor<96xf32>) outs(%73 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x112x112x96xf32>
  %75 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %76 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66, %72 : tensor<1x112x112x96xf32>, tensor<1x112x112x96xf32>) outs(%75 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x96xf32>
  %77 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %78 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%76, %68 : tensor<1x112x112x96xf32>, tensor<1x112x112x96xf32>) outs(%77 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x96xf32>
  %79 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %80 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%78, %74 : tensor<1x112x112x96xf32>, tensor<1x112x112x96xf32>) outs(%79 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x96xf32>
  %81 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %82 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%80, %70 : tensor<1x112x112x96xf32>, tensor<1x112x112x96xf32>) outs(%81 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x112x112x96xf32>
  %83 = linalg.init_tensor [1, 112, 112, 96] : tensor<1x112x112x96xf32>
  %84 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %82, %cst_69 : tensor<f32>, tensor<1x112x112x96xf32>, tensor<f32>) outs(%83 : tensor<1x112x112x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x112x112x96xf32>
  %cst_286 = constant 0.000000e+00 : f32
  %c0_287 = constant 0 : index
  %c0_288 = constant 0 : index
  %c0_289 = constant 0 : index
  %c1_290 = constant 1 : index
  %c0_291 = constant 0 : index
  %c1_292 = constant 1 : index
  %c0_293 = constant 0 : index
  %c0_294 = constant 0 : index
  %85 = linalg.pad_tensor %84 low[%c0_287, %c0_289, %c0_291, %c0_293] high[%c0_288, %c1_290, %c1_292, %c0_294]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst_286 : f32
  } : tensor<1x112x112x96xf32> to tensor<1x113x113x96xf32> 
  %86 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %cst_295 = constant 0.000000e+00 : f32
  %87 = linalg.fill(%cst_295, %86) : f32, tensor<1x56x56x96xf32> -> tensor<1x56x56x96xf32> 
  %88 = linalg.tensor_collapse_shape %cst_63 [[0], [1], [2, 3]] : tensor<3x3x96x1xf32> into tensor<3x3x96xf32>
  %89 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%85, %88 : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) outs(%87 : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
  %90 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %91 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_152 : tensor<96xf32>) outs(%90 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x96xf32>
  %92 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %93 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_151 : tensor<96xf32>) outs(%92 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x96xf32>
  %94 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %95 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_150 : tensor<96xf32>) outs(%94 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x96xf32>
  %96 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %97 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_62 : tensor<96xf32>) outs(%96 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x96xf32>
  %98 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %99 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%89, %95 : tensor<1x56x56x96xf32>, tensor<1x56x56x96xf32>) outs(%98 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x96xf32>
  %100 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %101 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%99, %91 : tensor<1x56x56x96xf32>, tensor<1x56x56x96xf32>) outs(%100 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x96xf32>
  %102 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %103 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%101, %97 : tensor<1x56x56x96xf32>, tensor<1x56x56x96xf32>) outs(%102 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x96xf32>
  %104 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %105 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%103, %93 : tensor<1x56x56x96xf32>, tensor<1x56x56x96xf32>) outs(%104 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x96xf32>
  %106 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
  %107 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %105, %cst_69 : tensor<f32>, tensor<1x56x56x96xf32>, tensor<f32>) outs(%106 : tensor<1x56x56x96xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x56x56x96xf32>
  %108 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %cst_296 = constant 0.000000e+00 : f32
  %109 = linalg.fill(%cst_296, %108) : f32, tensor<1x56x56x24xf32> -> tensor<1x56x56x24xf32> 
  %110 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%107, %cst_160 : tensor<1x56x56x96xf32>, tensor<1x1x96x24xf32>) outs(%109 : tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %111 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %112 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_159 : tensor<24xf32>) outs(%111 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %113 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %114 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_158 : tensor<24xf32>) outs(%113 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %115 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %116 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_157 : tensor<24xf32>) outs(%115 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %117 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %118 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_61 : tensor<24xf32>) outs(%117 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %119 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %120 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%110, %116 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%119 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %121 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %122 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%120, %112 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%121 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %123 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %124 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122, %118 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%123 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %125 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %126 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%124, %114 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%125 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %127 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %cst_297 = constant 0.000000e+00 : f32
  %128 = linalg.fill(%cst_297, %127) : f32, tensor<1x56x56x144xf32> -> tensor<1x56x56x144xf32> 
  %129 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%126, %cst_167 : tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) outs(%128 : tensor<1x56x56x144xf32>) -> tensor<1x56x56x144xf32>
  %130 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %131 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_166 : tensor<144xf32>) outs(%130 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %132 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %133 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_165 : tensor<144xf32>) outs(%132 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %134 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %135 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_164 : tensor<144xf32>) outs(%134 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %136 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %137 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_60 : tensor<144xf32>) outs(%136 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %138 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %139 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%129, %135 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%138 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %140 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %141 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%139, %131 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%140 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %142 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %143 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141, %137 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%142 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %144 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %145 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%143, %133 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%144 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %146 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %147 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %145, %cst_69 : tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) outs(%146 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x56x56x144xf32>
  %cst_298 = constant 0.000000e+00 : f32
  %c0_299 = constant 0 : index
  %c0_300 = constant 0 : index
  %c1_301 = constant 1 : index
  %c1_302 = constant 1 : index
  %c1_303 = constant 1 : index
  %c1_304 = constant 1 : index
  %c0_305 = constant 0 : index
  %c0_306 = constant 0 : index
  %148 = linalg.pad_tensor %147 low[%c0_299, %c1_301, %c1_303, %c0_305] high[%c0_300, %c1_302, %c1_304, %c0_306]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst_298 : f32
  } : tensor<1x56x56x144xf32> to tensor<1x58x58x144xf32> 
  %149 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %cst_307 = constant 0.000000e+00 : f32
  %150 = linalg.fill(%cst_307, %149) : f32, tensor<1x56x56x144xf32> -> tensor<1x56x56x144xf32> 
  %151 = linalg.tensor_collapse_shape %cst_59 [[0], [1], [2, 3]] : tensor<3x3x144x1xf32> into tensor<3x3x144xf32>
  %152 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%148, %151 : tensor<1x58x58x144xf32>, tensor<3x3x144xf32>) outs(%150 : tensor<1x56x56x144xf32>) -> tensor<1x56x56x144xf32>
  %153 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %154 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_163 : tensor<144xf32>) outs(%153 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %155 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %156 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_162 : tensor<144xf32>) outs(%155 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %157 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %158 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_161 : tensor<144xf32>) outs(%157 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %159 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %160 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_58 : tensor<144xf32>) outs(%159 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %161 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %162 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%152, %158 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%161 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %163 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %164 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%162, %154 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%163 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %165 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %166 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164, %160 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%165 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %167 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %168 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166, %156 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%167 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %169 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %170 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %168, %cst_69 : tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) outs(%169 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x56x56x144xf32>
  %171 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %cst_308 = constant 0.000000e+00 : f32
  %172 = linalg.fill(%cst_308, %171) : f32, tensor<1x56x56x24xf32> -> tensor<1x56x56x24xf32> 
  %173 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%170, %cst_171 : tensor<1x56x56x144xf32>, tensor<1x1x144x24xf32>) outs(%172 : tensor<1x56x56x24xf32>) -> tensor<1x56x56x24xf32>
  %174 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %175 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_170 : tensor<24xf32>) outs(%174 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %176 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %177 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_169 : tensor<24xf32>) outs(%176 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %178 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %179 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_168 : tensor<24xf32>) outs(%178 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %180 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %181 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_57 : tensor<24xf32>) outs(%180 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x24xf32>
  %182 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %183 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173, %179 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%182 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %184 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %185 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%183, %175 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%184 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %186 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %187 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%185, %181 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%186 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %188 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %189 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%187, %177 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%188 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %190 = linalg.init_tensor [1, 56, 56, 24] : tensor<1x56x56x24xf32>
  %191 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126, %189 : tensor<1x56x56x24xf32>, tensor<1x56x56x24xf32>) outs(%190 : tensor<1x56x56x24xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x24xf32>
  %192 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %cst_309 = constant 0.000000e+00 : f32
  %193 = linalg.fill(%cst_309, %192) : f32, tensor<1x56x56x144xf32> -> tensor<1x56x56x144xf32> 
  %194 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%191, %cst_178 : tensor<1x56x56x24xf32>, tensor<1x1x24x144xf32>) outs(%193 : tensor<1x56x56x144xf32>) -> tensor<1x56x56x144xf32>
  %195 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %196 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_177 : tensor<144xf32>) outs(%195 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %197 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %198 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_176 : tensor<144xf32>) outs(%197 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %199 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %200 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_175 : tensor<144xf32>) outs(%199 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %201 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %202 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_56 : tensor<144xf32>) outs(%201 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x56x56x144xf32>
  %203 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %204 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194, %200 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%203 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = subf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %205 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %206 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%204, %196 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%205 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = mulf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %207 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %208 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206, %202 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%207 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = divf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %209 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %210 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208, %198 : tensor<1x56x56x144xf32>, tensor<1x56x56x144xf32>) outs(%209 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors
    %1145 = addf %arg1, %arg2 : f32
    linalg.yield %1145 : f32
  } -> tensor<1x56x56x144xf32>
  %211 = linalg.init_tensor [1, 56, 56, 144] : tensor<1x56x56x144xf32>
  %212 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> ()>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_72, %210, %cst_69 : tensor<f32>, tensor<1x56x56x144xf32>, tensor<f32>) outs(%211 : tensor<1x56x56x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
    %1145 = cmpf olt, %arg2, %arg3 : f32
    %1146 = select %1145, %arg2, %arg3 : f32
    %1147 = cmpf uno, %arg2, %arg3 : f32
    %cst_482 = constant 0x7FC00000 : f32
    %1148 = select %1147, %cst_482, %1146 : f32
    %1149 = cmpf ogt, %1148, %arg1 : f32
    %1150 = select %1149, %1148, %arg1 : f32
    %1151 = cmpf uno, %1148, %arg1 : f32
    %cst_483 = constant 0x7FC00000 : f32
    %1152 = select %1151, %cst_483, %1150 : f32
    linalg.yield %1152 : f32
  } -> tensor<1x56x56x144xf32>
  %cst_310 = constant 0.000000e+00 : f32
  %c0_311 = constant 0 : index
  %c0_312 = constant 0 : index
  %c0_313 = constant 0 : index
  %c1_314 = constant 1 : index
  %c0_315 = constant 0 : index
  %c1_316 = constant 1 : index
  %c0_317 = constant 0 : index
  %c0_318 = constant 0 : index
  %213 = linalg.pad_tensor %212 low[%c0_311, %c0_313, %c0_315, %c0_317] high[%c0_312, %c1_314, %c1_316, %c0_318]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst_310 : f32
  } : tensor<1x56x56x144xf32> to tensor<1x57x57x144xf32> 
  %214 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
  %cst_319 = constant 0.000000e+00 : f32
  %215 = linalg.fill(%cst_319, %214) : f32, tensor<1x28x28x144xf32> -> tensor<1x28x28x144xf32> 
  %216 = linalg.tensor_collapse_shape %cst_55 [[0], [1], [2, 3]] : tensor<3x3x144x1xf32> into tensor<3x3x144xf32>
  %217 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%213, %216 : tensor<1x57x57x144xf32>, tensor<3x3x144xf32>) outs(%215 : tensor<1x28x28x144xf32>) -> tensor<1x28x28x144xf32>
  %218 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
  %219 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_174 : tensor<144xf32>) outs(%218 : tensor<1x28x28x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x28x28x144xf32>
  %220 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
  %221 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_173 : tensor<144xf32>) outs(%220 : tensor<1x28x28x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x28x28x144xf32>
  %222 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
  %223 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_172 : tensor<144xf32>) outs(%222 : tensor<1x28x28x144xf32>) {
  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
    linalg.yield %arg1 : f32
  } -> tensor<1x28x28x144xf32>
  %224 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
  %225 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_54 : tensor<144xf32>) ou
No results found