pashu123 · August 11, 2022 05:24
diff --git a/hello1.mlir b/hello1.mlir
 #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 #map1 = affine_map<(d0, d1, d2, d3) -> (d1)>
 #map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)>
 #map3 = affine_map<(d0, d1) -> (d1)>
 #map4 = affine_map<(d0, d1) -> (d0, d1)>
 #map5 = affine_map<(d0, d1) -> (d1, d0)>
 module attributes {torch.debug_module_name = "VisionModule"} {
  func.func @forward(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
    %false = arith.constant false
    %cst = arith.constant dense_resource<__elided__> : tensor<1000xf32>
    %cst_0 = arith.constant dense_resource<__elided__> : tensor<1000x2048xf32>
    %cst_1 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_2 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_3 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_4 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_5 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32>
    %cst_6 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_7 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_8 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_9 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_10 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32>
    %cst_11 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_12 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_13 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_14 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_15 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32>
    %cst_16 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_17 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_18 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_19 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_20 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32>
    %cst_21 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_22 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_23 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_24 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_25 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32>
    %cst_26 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_27 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_28 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_29 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_30 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32>
    %cst_31 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_32 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_33 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_34 = arith.constant dense_resource<__elided__> : tensor<2048x1024x1x1xf32>
    %cst_35 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_36 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_37 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_38 = arith.constant dense_resource<__elided__> : tensor<2048xf32>
    %cst_39 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32>
    %cst_40 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_41 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_42 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_43 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_44 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32>
    %cst_45 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_46 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_47 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_48 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_49 = arith.constant dense_resource<__elided__> : tensor<512x1024x1x1xf32>
    %cst_50 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_51 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_52 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_53 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_54 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_55 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_56 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_57 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_58 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_59 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_60 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_61 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_62 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_63 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_64 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32>
    %cst_65 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_66 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_67 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_68 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_69 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_70 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_71 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_72 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_73 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_74 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_75 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_76 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_77 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_78 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_79 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32>
    %cst_80 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_81 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_82 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_83 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_84 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_85 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_86 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_87 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_88 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_89 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_90 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_91 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_92 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_93 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_94 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32>
    %cst_95 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_96 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_97 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_98 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_99 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_100 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_101 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_102 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_103 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_104 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_105 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_106 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_107 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_108 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_109 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32>
    %cst_110 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_111 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_112 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_113 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_114 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_115 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_116 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_117 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_118 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_119 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_120 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_121 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_122 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_123 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_124 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32>
    %cst_125 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_126 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_127 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_128 = arith.constant dense_resource<__elided__> : tensor<1024x512x1x1xf32>
    %cst_129 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_130 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_131 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_132 = arith.constant dense_resource<__elided__> : tensor<1024xf32>
    %cst_133 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32>
    %cst_134 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_135 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_136 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_137 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_138 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32>
    %cst_139 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_140 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_141 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_142 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_143 = arith.constant dense_resource<__elided__> : tensor<256x512x1x1xf32>
    %cst_144 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_145 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_146 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_147 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_148 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32>
    %cst_149 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_150 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_151 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_152 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_153 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32>
    %cst_154 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_155 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_156 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_157 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_158 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32>
    %cst_159 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_160 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_161 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_162 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_163 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32>
    %cst_164 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_165 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_166 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_167 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_168 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32>
    %cst_169 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_170 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_171 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_172 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_173 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32>
    %cst_174 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_175 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_176 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_177 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_178 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32>
    %cst_179 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_180 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_181 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_182 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_183 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32>
    %cst_184 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_185 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_186 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_187 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_188 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32>
    %cst_189 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_190 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_191 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_192 = arith.constant dense_resource<__elided__> : tensor<512x256x1x1xf32>
    %cst_193 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_194 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_195 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_196 = arith.constant dense_resource<__elided__> : tensor<512xf32>
    %cst_197 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32>
    %cst_198 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_199 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_200 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_201 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_202 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32>
    %cst_203 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_204 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_205 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_206 = arith.constant dense_resource<__elided__> : tensor<128xf32>
    %cst_207 = arith.constant dense_resource<__elided__> : tensor<128x256x1x1xf32>
    %cst_208 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_209 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_210 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_211 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_212 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32>
    %cst_213 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_214 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_215 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_216 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_217 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32>
    %cst_218 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_219 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_220 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_221 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_222 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32>
    %cst_223 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_224 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_225 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_226 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_227 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32>
    %cst_228 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_229 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_230 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_231 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_232 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32>
    %cst_233 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_234 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_235 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_236 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_237 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32>
    %cst_238 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_239 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_240 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_241 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32>
    %cst_242 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_243 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_244 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_245 = arith.constant dense_resource<__elided__> : tensor<256xf32>
    %cst_246 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32>
    %cst_247 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_248 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_249 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_250 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_251 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32>
    %cst_252 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_253 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_254 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_255 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_256 = arith.constant dense_resource<__elided__> : tensor<64x64x1x1xf32>
    %cst_257 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_258 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_259 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_260 = arith.constant dense_resource<__elided__> : tensor<64xf32>
    %cst_261 = arith.constant dense_resource<__elided__> : tensor<64x3x7x7xf32>
    %cst_262 = arith.constant 1.000000e-05 : f64
    %cst_263 = arith.constant 0.000000e+00 : f32
    %cst_264 = arith.constant -3.40282347E+38 : f32
    %cst_265 = arith.constant 4.900000e+01 : f32
    %0 = tensor.pad %arg0 low[0, 0, 3, 3] high[0, 0, 3, 3] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x3x224x224xf32> to tensor<1x3x230x230xf32>
    %1 = linalg.init_tensor [1, 64, 112, 112] : tensor<1x64x112x112xf32>
    %2 = linalg.fill ins(%cst_263 : f32) outs(%1 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32>
    %3 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%0, %cst_261 : tensor<1x3x230x230xf32>, tensor<64x3x7x7xf32>) outs(%2 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32>
    %4 = arith.cmpi eq, %false, %false : i1
    cf.assert %4, "training is not supported for now"
    %5 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%3, %cst_258, %cst_257, %cst_260, %cst_259 : tensor<1x64x112x112xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%3 : tensor<1x64x112x112xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x112x112xf32>
    %6 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%5 : tensor<1x64x112x112xf32>) outs(%1 : tensor<1x64x112x112xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x112x112xf32>
    %7 = tensor.pad %6 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_264 : f32
    } : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32>
    %8 = linalg.init_tensor [1, 64, 56, 56] : tensor<1x64x56x56xf32>
    %9 = linalg.fill ins(%cst_264 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %10 = linalg.init_tensor [3, 3] : tensor<3x3xf32>
    %11 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%7, %10 : tensor<1x64x114x114xf32>, tensor<3x3xf32>) outs(%9 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %12 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %13 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_256 : tensor<1x64x56x56xf32>, tensor<64x64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %14 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %cst_253, %cst_252, %cst_255, %cst_254 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%13 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %15 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %16 = tensor.pad %15 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
    %17 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %18 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%16, %cst_251 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%17 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %19 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %cst_248, %cst_247, %cst_250, %cst_249 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%18 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %20 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %21 = linalg.init_tensor [1, 256, 56, 56] : tensor<1x256x56x56xf32>
    %22 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    %23 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%20, %cst_246 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%22 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %24 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%23, %cst_243, %cst_242, %cst_245, %cst_244 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%23 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x56x56xf32>
    %25 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    %26 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_241 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%25 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %27 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26, %cst_238, %cst_242, %cst_240, %cst_239 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%26 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x56x56xf32>
    %28 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%24, %27 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x256x56x56xf32>
    %29 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%28 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x56x56xf32>
    %30 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %31 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%29, %cst_237 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%30 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %32 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%31, %cst_234, %cst_233, %cst_236, %cst_235 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%31 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %33 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%32 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %34 = tensor.pad %33 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
    %35 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %36 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%34, %cst_232 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%35 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %37 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%36, %cst_229, %cst_228, %cst_231, %cst_230 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%36 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %38 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%37 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %39 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    %40 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%38, %cst_227 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%39 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %41 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %cst_224, %cst_223, %cst_226, %cst_225 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%40 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x56x56xf32>
    %42 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%41, %29 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x256x56x56xf32>
    %43 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%42 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x56x56xf32>
    %44 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %45 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%43, %cst_222 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%44 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %46 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%45, %cst_219, %cst_218, %cst_221, %cst_220 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%45 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %47 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %48 = tensor.pad %47 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
    %49 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    %50 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%48, %cst_217 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%49 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %51 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50, %cst_214, %cst_213, %cst_216, %cst_215 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%50 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x64x56x56xf32>
    %52 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%51 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x64x56x56xf32>
    %53 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    %54 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%52, %cst_212 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%53 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %55 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54, %cst_209, %cst_208, %cst_211, %cst_210 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%54 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x56x56xf32>
    %56 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%55, %43 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x256x56x56xf32>
    %57 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%56 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x56x56xf32>
    %58 = linalg.init_tensor [1, 128, 56, 56] : tensor<1x128x56x56xf32>
    %59 = linalg.fill ins(%cst_263 : f32) outs(%58 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32>
    %60 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%57, %cst_207 : tensor<1x256x56x56xf32>, tensor<128x256x1x1xf32>) outs(%59 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32>
    cf.assert %4, "training is not supported for now"
    %61 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%60, %cst_204, %cst_203, %cst_206, %cst_205 : tensor<1x128x56x56xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%60 : tensor<1x128x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x56x56xf32>
    %62 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%61 : tensor<1x128x56x56xf32>) outs(%58 : tensor<1x128x56x56xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x56x56xf32>
    %63 = tensor.pad %62 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x128x56x56xf32> to tensor<1x128x58x58xf32>
    %64 = linalg.init_tensor [1, 128, 28, 28] : tensor<1x128x28x28xf32>
    %65 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %66 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%63, %cst_202 : tensor<1x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%65 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %67 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66, %cst_199, %cst_198, %cst_201, %cst_200 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%66 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %68 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%67 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %69 = linalg.init_tensor [1, 512, 28, 28] : tensor<1x512x28x28xf32>
    %70 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    %71 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%68, %cst_197 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%70 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %72 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%71, %cst_194, %cst_193, %cst_196, %cst_195 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%71 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x28x28xf32>
    %73 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    %74 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%57, %cst_192 : tensor<1x256x56x56xf32>, tensor<512x256x1x1xf32>) outs(%73 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %75 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74, %cst_189, %cst_193, %cst_191, %cst_190 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%74 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x28x28xf32>
    %76 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%72, %75 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x512x28x28xf32>
    %77 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%76 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x28x28xf32>
    %78 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %79 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%77, %cst_188 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%78 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %80 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79, %cst_185, %cst_184, %cst_187, %cst_186 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%79 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %81 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%80 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %82 = tensor.pad %81 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32>
    %83 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %84 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%82, %cst_183 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%83 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %85 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%84, %cst_180, %cst_179, %cst_182, %cst_181 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%84 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %86 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%85 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %87 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    %88 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%86, %cst_178 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%87 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %89 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%88, %cst_175, %cst_174, %cst_177, %cst_176 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%88 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x28x28xf32>
    %90 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%89, %77 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x512x28x28xf32>
    %91 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%90 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x28x28xf32>
    %92 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %93 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%91, %cst_173 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%92 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %94 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93, %cst_170, %cst_169, %cst_172, %cst_171 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%93 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %95 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %96 = tensor.pad %95 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32>
    %97 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %98 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%96, %cst_168 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%97 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %99 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98, %cst_165, %cst_164, %cst_167, %cst_166 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%98 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %100 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%99 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %101 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    %102 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%100, %cst_163 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%101 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %103 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102, %cst_160, %cst_159, %cst_162, %cst_161 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%102 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x28x28xf32>
    %104 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%103, %91 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x512x28x28xf32>
    %105 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%104 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x28x28xf32>
    %106 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %107 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%105, %cst_158 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%106 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %108 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%107, %cst_155, %cst_154, %cst_157, %cst_156 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%107 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %109 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%108 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %110 = tensor.pad %109 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32>
    %111 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    %112 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%110, %cst_153 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%111 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %113 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%112, %cst_150, %cst_149, %cst_152, %cst_151 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%112 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x128x28x28xf32>
    %114 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%113 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x128x28x28xf32>
    %115 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    %116 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%114, %cst_148 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%115 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %117 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%116, %cst_145, %cst_144, %cst_147, %cst_146 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%116 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x28x28xf32>
    %118 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%117, %105 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x512x28x28xf32>
    %119 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x28x28xf32>
    %120 = linalg.init_tensor [1, 256, 28, 28] : tensor<1x256x28x28xf32>
    %121 = linalg.fill ins(%cst_263 : f32) outs(%120 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32>
    %122 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%119, %cst_143 : tensor<1x512x28x28xf32>, tensor<256x512x1x1xf32>) outs(%121 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32>
    cf.assert %4, "training is not supported for now"
    %123 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122, %cst_140, %cst_139, %cst_142, %cst_141 : tensor<1x256x28x28xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%122 : tensor<1x256x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x28x28xf32>
    %124 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%123 : tensor<1x256x28x28xf32>) outs(%120 : tensor<1x256x28x28xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x28x28xf32>
    %125 = tensor.pad %124 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x28x28xf32> to tensor<1x256x30x30xf32>
    %126 = linalg.init_tensor [1, 256, 14, 14] : tensor<1x256x14x14xf32>
    %127 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %128 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%125, %cst_138 : tensor<1x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%127 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %129 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%128, %cst_135, %cst_134, %cst_137, %cst_136 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%128 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %130 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%129 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %131 = linalg.init_tensor [1, 1024, 14, 14] : tensor<1x1024x14x14xf32>
    %132 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %133 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%130, %cst_133 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%132 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %134 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%133, %cst_130, %cst_129, %cst_132, %cst_131 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%133 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %135 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %136 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%119, %cst_128 : tensor<1x512x28x28xf32>, tensor<1024x512x1x1xf32>) outs(%135 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %137 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%136, %cst_125, %cst_129, %cst_127, %cst_126 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%136 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %138 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134, %137 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %139 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %140 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %141 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%139, %cst_124 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%140 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %142 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141, %cst_121, %cst_120, %cst_123, %cst_122 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%141 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %143 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %144 = tensor.pad %143 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32>
    %145 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %146 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%144, %cst_119 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%145 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %147 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%146, %cst_116, %cst_115, %cst_118, %cst_117 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%146 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %148 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%147 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %149 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %150 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%148, %cst_114 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%149 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %151 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150, %cst_111, %cst_110, %cst_113, %cst_112 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%150 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %152 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%151, %139 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %153 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%152 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %154 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %155 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%153, %cst_109 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%154 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %156 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%155, %cst_106, %cst_105, %cst_108, %cst_107 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%155 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %157 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%156 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %158 = tensor.pad %157 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32>
    %159 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %160 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%158, %cst_104 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%159 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %161 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%160, %cst_101, %cst_100, %cst_103, %cst_102 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%160 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %162 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%161 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %163 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %164 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%162, %cst_99 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%163 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %165 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164, %cst_96, %cst_95, %cst_98, %cst_97 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%164 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %166 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%165, %153 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %167 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %168 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %169 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%167, %cst_94 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%168 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %170 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%169, %cst_91, %cst_90, %cst_93, %cst_92 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%169 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %171 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%170 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %172 = tensor.pad %171 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32>
    %173 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %174 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%172, %cst_89 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%173 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %175 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174, %cst_86, %cst_85, %cst_88, %cst_87 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%174 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %176 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%175 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %177 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %178 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%176, %cst_84 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%177 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %179 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%178, %cst_81, %cst_80, %cst_83, %cst_82 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%178 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %180 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179, %167 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %181 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%180 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %182 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %183 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%181, %cst_79 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%182 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %184 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%183, %cst_76, %cst_75, %cst_78, %cst_77 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%183 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %185 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%184 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %186 = tensor.pad %185 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32>
    %187 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %188 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%186, %cst_74 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%187 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %189 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%188, %cst_71, %cst_70, %cst_73, %cst_72 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%188 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %190 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%189 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %191 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %192 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%190, %cst_69 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%191 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %193 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%192, %cst_66, %cst_65, %cst_68, %cst_67 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%192 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %194 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%193, %181 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %195 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %196 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %197 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%195, %cst_64 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%196 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %198 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%197, %cst_61, %cst_60, %cst_63, %cst_62 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%197 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %199 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %200 = tensor.pad %199 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32>
    %201 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    %202 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%200, %cst_59 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%201 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %203 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202, %cst_56, %cst_55, %cst_58, %cst_57 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%202 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x256x14x14xf32>
    %204 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%203 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x256x14x14xf32>
    %205 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    %206 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%204, %cst_54 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%205 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %207 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206, %cst_51, %cst_50, %cst_53, %cst_52 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%206 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x1024x14x14xf32>
    %208 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%207, %195 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x1024x14x14xf32>
    %209 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x1024x14x14xf32>
    %210 = linalg.init_tensor [1, 512, 14, 14] : tensor<1x512x14x14xf32>
    %211 = linalg.fill ins(%cst_263 : f32) outs(%210 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32>
    %212 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%209, %cst_49 : tensor<1x1024x14x14xf32>, tensor<512x1024x1x1xf32>) outs(%211 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32>
    cf.assert %4, "training is not supported for now"
    %213 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%212, %cst_46, %cst_45, %cst_48, %cst_47 : tensor<1x512x14x14xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%212 : tensor<1x512x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x14x14xf32>
    %214 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%213 : tensor<1x512x14x14xf32>) outs(%210 : tensor<1x512x14x14xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x14x14xf32>
    %215 = tensor.pad %214 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x512x14x14xf32> to tensor<1x512x16x16xf32>
    %216 = linalg.init_tensor [1, 512, 7, 7] : tensor<1x512x7x7xf32>
    %217 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    %218 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%215, %cst_44 : tensor<1x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%217 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %219 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%218, %cst_41, %cst_40, %cst_43, %cst_42 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%218 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x7x7xf32>
    %220 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%219 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x7x7xf32>
    %221 = linalg.init_tensor [1, 2048, 7, 7] : tensor<1x2048x7x7xf32>
    %222 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    %223 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%220, %cst_39 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%222 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %224 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%223, %cst_36, %cst_35, %cst_38, %cst_37 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%223 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x2048x7x7xf32>
    %225 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    %226 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%209, %cst_34 : tensor<1x1024x14x14xf32>, tensor<2048x1024x1x1xf32>) outs(%225 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %227 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%226, %cst_31, %cst_35, %cst_33, %cst_32 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%226 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x2048x7x7xf32>
    %228 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%224, %227 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x2048x7x7xf32>
    %229 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%228 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x2048x7x7xf32>
    %230 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    %231 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%229, %cst_30 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%230 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %232 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%231, %cst_27, %cst_26, %cst_29, %cst_28 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%231 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x7x7xf32>
    %233 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%232 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x7x7xf32>
    %234 = tensor.pad %233 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32>
    %235 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    %236 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%234, %cst_25 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%235 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %237 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%236, %cst_22, %cst_21, %cst_24, %cst_23 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%236 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x7x7xf32>
    %238 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x7x7xf32>
    %239 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    %240 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%238, %cst_20 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%239 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %241 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%240, %cst_17, %cst_16, %cst_19, %cst_18 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%240 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x2048x7x7xf32>
    %242 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%241, %229 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x2048x7x7xf32>
    %243 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%242 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x2048x7x7xf32>
    %244 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    %245 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%243, %cst_15 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%244 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %246 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%245, %cst_12, %cst_11, %cst_14, %cst_13 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%245 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x7x7xf32>
    %247 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%246 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x7x7xf32>
    %248 = tensor.pad %247 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_263 : f32
    } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32>
    %249 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    %250 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%248, %cst_10 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%249 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %251 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%250, %cst_7, %cst_6, %cst_9, %cst_8 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%250 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x512x7x7xf32>
    %252 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%251 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x512x7x7xf32>
    %253 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    %254 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%252, %cst_5 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%253 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32>
    cf.assert %4, "training is not supported for now"
    %255 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%254, %cst_2, %cst_1, %cst_4, %cst_3 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%254 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
      %269 = arith.truncf %cst_262 : f64 to f32
      %270 = arith.addf %arg5, %269 : f32
      %271 = math.rsqrt %270 : f32
      %272 = arith.subf %arg1, %arg4 : f32
      %273 = arith.mulf %272, %271 : f32
      %274 = arith.mulf %273, %arg2 : f32
      %275 = arith.addf %274, %arg3 : f32
      linalg.yield %275 : f32
    } -> tensor<1x2048x7x7xf32>
    %256 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%255, %243 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %269 = arith.addf %arg1, %arg2 : f32
      linalg.yield %269 : f32
    } -> tensor<1x2048x7x7xf32>
    %257 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%256 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.cmpf ugt, %arg1, %cst_263 : f32
      %270 = arith.select %269, %arg1, %cst_263 : f32
      linalg.yield %270 : f32
    } -> tensor<1x2048x7x7xf32>
    %258 = linalg.init_tensor [1, 2048, 1, 1] : tensor<1x2048x1x1xf32>
    %259 = linalg.fill ins(%cst_263 : f32) outs(%258 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32>
    %260 = linalg.init_tensor [7, 7] : tensor<7x7xf32>
    %261 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%257, %260 : tensor<1x2048x7x7xf32>, tensor<7x7xf32>) outs(%259 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32>
    %262 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%261 : tensor<1x2048x1x1xf32>) outs(%258 : tensor<1x2048x1x1xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      %269 = arith.divf %arg1, %cst_265 : f32
      linalg.yield %269 : f32
    } -> tensor<1x2048x1x1xf32>
    %263 = tensor.collapse_shape %262 [[0], [1, 2, 3]] : tensor<1x2048x1x1xf32> into tensor<1x2048xf32>
    %264 = linalg.init_tensor [1, 1000] : tensor<1x1000xf32>
    %265 = linalg.init_tensor [2048, 1000] : tensor<2048x1000xf32>
    %266 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst : tensor<1000xf32>) outs(%264 : tensor<1x1000xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      linalg.yield %arg1 : f32
    } -> tensor<1x1000xf32>
    %267 = linalg.generic {indexing_maps = [#map5, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : tensor<1000x2048xf32>) outs(%265 : tensor<2048x1000xf32>) {
    ^bb0(%arg1: f32, %arg2: f32):
      linalg.yield %arg1 : f32
    } -> tensor<2048x1000xf32>
    %268 = linalg.matmul ins(%263, %267 : tensor<1x2048xf32>, tensor<2048x1000xf32>) outs(%266 : tensor<1x1000xf32>) -> tensor<1x1000xf32>
    return %268 : tensor<1x1000xf32>
  }
 }