Created
August 11, 2022 05:24
-
-
Save pashu123/274e2d6cf3b01040faff2814333a93b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> | |
| #map1 = affine_map<(d0, d1, d2, d3) -> (d1)> | |
| #map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> | |
| #map3 = affine_map<(d0, d1) -> (d1)> | |
| #map4 = affine_map<(d0, d1) -> (d0, d1)> | |
| #map5 = affine_map<(d0, d1) -> (d1, d0)> | |
| module attributes {torch.debug_module_name = "VisionModule"} { | |
| func.func @forward(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
| %false = arith.constant false | |
| %cst = arith.constant dense_resource<__elided__> : tensor<1000xf32> | |
| %cst_0 = arith.constant dense_resource<__elided__> : tensor<1000x2048xf32> | |
| %cst_1 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_2 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_3 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_4 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_5 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
| %cst_6 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_7 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_8 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_9 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_10 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
| %cst_11 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_12 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_13 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_14 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_15 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32> | |
| %cst_16 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_17 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_18 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_19 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_20 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
| %cst_21 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_22 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_23 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_24 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_25 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
| %cst_26 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_27 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_28 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_29 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_30 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32> | |
| %cst_31 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_32 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_33 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_34 = arith.constant dense_resource<__elided__> : tensor<2048x1024x1x1xf32> | |
| %cst_35 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_36 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_37 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_38 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
| %cst_39 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
| %cst_40 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_41 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_42 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_43 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_44 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
| %cst_45 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_46 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_47 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_48 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_49 = arith.constant dense_resource<__elided__> : tensor<512x1024x1x1xf32> | |
| %cst_50 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_51 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_52 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_53 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_54 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_55 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_56 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_57 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_58 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_59 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_60 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_61 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_62 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_63 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_64 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
| %cst_65 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_66 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_67 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_68 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_69 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_70 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_71 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_72 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_73 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_74 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_75 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_76 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_77 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_78 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_79 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
| %cst_80 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_81 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_82 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_83 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_84 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_85 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_86 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_87 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_88 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_89 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_90 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_91 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_92 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_93 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_94 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
| %cst_95 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_96 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_97 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_98 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_99 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_100 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_101 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_102 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_103 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_104 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_105 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_106 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_107 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_108 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_109 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
| %cst_110 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_111 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_112 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_113 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_114 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_115 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_116 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_117 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_118 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_119 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_120 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_121 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_122 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_123 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_124 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
| %cst_125 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_126 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_127 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_128 = arith.constant dense_resource<__elided__> : tensor<1024x512x1x1xf32> | |
| %cst_129 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_130 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_131 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_132 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
| %cst_133 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
| %cst_134 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_135 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_136 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_137 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_138 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
| %cst_139 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_140 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_141 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_142 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_143 = arith.constant dense_resource<__elided__> : tensor<256x512x1x1xf32> | |
| %cst_144 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_145 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_146 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_147 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_148 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
| %cst_149 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_150 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_151 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_152 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_153 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
| %cst_154 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_155 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_156 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_157 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_158 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
| %cst_159 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_160 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_161 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_162 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_163 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
| %cst_164 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_165 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_166 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_167 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_168 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
| %cst_169 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_170 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_171 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_172 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_173 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
| %cst_174 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_175 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_176 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_177 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_178 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
| %cst_179 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_180 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_181 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_182 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_183 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
| %cst_184 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_185 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_186 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_187 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_188 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
| %cst_189 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_190 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_191 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_192 = arith.constant dense_resource<__elided__> : tensor<512x256x1x1xf32> | |
| %cst_193 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_194 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_195 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_196 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
| %cst_197 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
| %cst_198 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_199 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_200 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_201 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_202 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
| %cst_203 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_204 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_205 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_206 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
| %cst_207 = arith.constant dense_resource<__elided__> : tensor<128x256x1x1xf32> | |
| %cst_208 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_209 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_210 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_211 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_212 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
| %cst_213 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_214 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_215 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_216 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_217 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
| %cst_218 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_219 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_220 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_221 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_222 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32> | |
| %cst_223 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_224 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_225 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_226 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_227 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
| %cst_228 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_229 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_230 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_231 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_232 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
| %cst_233 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_234 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_235 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_236 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_237 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32> | |
| %cst_238 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_239 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_240 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_241 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
| %cst_242 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_243 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_244 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_245 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
| %cst_246 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
| %cst_247 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_248 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_249 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_250 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_251 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
| %cst_252 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_253 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_254 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_255 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_256 = arith.constant dense_resource<__elided__> : tensor<64x64x1x1xf32> | |
| %cst_257 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_258 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_259 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_260 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
| %cst_261 = arith.constant dense_resource<__elided__> : tensor<64x3x7x7xf32> | |
| %cst_262 = arith.constant 1.000000e-05 : f64 | |
| %cst_263 = arith.constant 0.000000e+00 : f32 | |
| %cst_264 = arith.constant -3.40282347E+38 : f32 | |
| %cst_265 = arith.constant 4.900000e+01 : f32 | |
| %0 = tensor.pad %arg0 low[0, 0, 3, 3] high[0, 0, 3, 3] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x3x224x224xf32> to tensor<1x3x230x230xf32> | |
| %1 = linalg.init_tensor [1, 64, 112, 112] : tensor<1x64x112x112xf32> | |
| %2 = linalg.fill ins(%cst_263 : f32) outs(%1 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> | |
| %3 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%0, %cst_261 : tensor<1x3x230x230xf32>, tensor<64x3x7x7xf32>) outs(%2 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> | |
| %4 = arith.cmpi eq, %false, %false : i1 | |
| cf.assert %4, "training is not supported for now" | |
| %5 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%3, %cst_258, %cst_257, %cst_260, %cst_259 : tensor<1x64x112x112xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%3 : tensor<1x64x112x112xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x112x112xf32> | |
| %6 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%5 : tensor<1x64x112x112xf32>) outs(%1 : tensor<1x64x112x112xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x112x112xf32> | |
| %7 = tensor.pad %6 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_264 : f32 | |
| } : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32> | |
| %8 = linalg.init_tensor [1, 64, 56, 56] : tensor<1x64x56x56xf32> | |
| %9 = linalg.fill ins(%cst_264 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %10 = linalg.init_tensor [3, 3] : tensor<3x3xf32> | |
| %11 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%7, %10 : tensor<1x64x114x114xf32>, tensor<3x3xf32>) outs(%9 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %12 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %13 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_256 : tensor<1x64x56x56xf32>, tensor<64x64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %14 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %cst_253, %cst_252, %cst_255, %cst_254 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%13 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %15 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %16 = tensor.pad %15 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
| %17 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %18 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%16, %cst_251 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%17 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %19 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %cst_248, %cst_247, %cst_250, %cst_249 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%18 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %20 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %21 = linalg.init_tensor [1, 256, 56, 56] : tensor<1x256x56x56xf32> | |
| %22 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| %23 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%20, %cst_246 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%22 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %24 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%23, %cst_243, %cst_242, %cst_245, %cst_244 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%23 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %25 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| %26 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_241 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%25 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %27 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26, %cst_238, %cst_242, %cst_240, %cst_239 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%26 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %28 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%24, %27 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %29 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%28 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %30 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %31 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%29, %cst_237 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%30 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %32 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%31, %cst_234, %cst_233, %cst_236, %cst_235 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%31 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %33 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%32 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %34 = tensor.pad %33 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
| %35 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %36 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%34, %cst_232 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%35 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %37 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%36, %cst_229, %cst_228, %cst_231, %cst_230 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%36 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %38 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%37 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %39 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| %40 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%38, %cst_227 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%39 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %41 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %cst_224, %cst_223, %cst_226, %cst_225 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%40 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %42 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%41, %29 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %43 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%42 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %44 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %45 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%43, %cst_222 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%44 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %46 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%45, %cst_219, %cst_218, %cst_221, %cst_220 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%45 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %47 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %48 = tensor.pad %47 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
| %49 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| %50 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%48, %cst_217 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%49 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %51 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50, %cst_214, %cst_213, %cst_216, %cst_215 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%50 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %52 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%51 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x64x56x56xf32> | |
| %53 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| %54 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%52, %cst_212 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%53 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %55 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54, %cst_209, %cst_208, %cst_211, %cst_210 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%54 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %56 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%55, %43 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %57 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%56 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x56x56xf32> | |
| %58 = linalg.init_tensor [1, 128, 56, 56] : tensor<1x128x56x56xf32> | |
| %59 = linalg.fill ins(%cst_263 : f32) outs(%58 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> | |
| %60 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%57, %cst_207 : tensor<1x256x56x56xf32>, tensor<128x256x1x1xf32>) outs(%59 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %61 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%60, %cst_204, %cst_203, %cst_206, %cst_205 : tensor<1x128x56x56xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%60 : tensor<1x128x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x56x56xf32> | |
| %62 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%61 : tensor<1x128x56x56xf32>) outs(%58 : tensor<1x128x56x56xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x56x56xf32> | |
| %63 = tensor.pad %62 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x128x56x56xf32> to tensor<1x128x58x58xf32> | |
| %64 = linalg.init_tensor [1, 128, 28, 28] : tensor<1x128x28x28xf32> | |
| %65 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %66 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%63, %cst_202 : tensor<1x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%65 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %67 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66, %cst_199, %cst_198, %cst_201, %cst_200 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%66 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %68 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%67 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %69 = linalg.init_tensor [1, 512, 28, 28] : tensor<1x512x28x28xf32> | |
| %70 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| %71 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%68, %cst_197 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%70 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %72 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%71, %cst_194, %cst_193, %cst_196, %cst_195 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%71 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %73 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| %74 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%57, %cst_192 : tensor<1x256x56x56xf32>, tensor<512x256x1x1xf32>) outs(%73 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %75 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74, %cst_189, %cst_193, %cst_191, %cst_190 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%74 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %76 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%72, %75 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %77 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%76 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %78 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %79 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%77, %cst_188 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%78 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %80 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79, %cst_185, %cst_184, %cst_187, %cst_186 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%79 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %81 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%80 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %82 = tensor.pad %81 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
| %83 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %84 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%82, %cst_183 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%83 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %85 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%84, %cst_180, %cst_179, %cst_182, %cst_181 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%84 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %86 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%85 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %87 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| %88 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%86, %cst_178 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%87 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %89 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%88, %cst_175, %cst_174, %cst_177, %cst_176 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%88 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %90 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%89, %77 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %91 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%90 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %92 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %93 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%91, %cst_173 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%92 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %94 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93, %cst_170, %cst_169, %cst_172, %cst_171 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%93 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %95 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %96 = tensor.pad %95 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
| %97 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %98 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%96, %cst_168 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%97 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %99 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98, %cst_165, %cst_164, %cst_167, %cst_166 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%98 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %100 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%99 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %101 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| %102 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%100, %cst_163 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%101 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %103 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102, %cst_160, %cst_159, %cst_162, %cst_161 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%102 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %104 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%103, %91 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %105 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%104 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %106 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %107 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%105, %cst_158 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%106 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %108 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%107, %cst_155, %cst_154, %cst_157, %cst_156 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%107 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %109 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%108 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %110 = tensor.pad %109 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
| %111 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| %112 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%110, %cst_153 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%111 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %113 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%112, %cst_150, %cst_149, %cst_152, %cst_151 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%112 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %114 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%113 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x128x28x28xf32> | |
| %115 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| %116 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%114, %cst_148 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%115 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %117 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%116, %cst_145, %cst_144, %cst_147, %cst_146 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%116 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %118 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%117, %105 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %119 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x28x28xf32> | |
| %120 = linalg.init_tensor [1, 256, 28, 28] : tensor<1x256x28x28xf32> | |
| %121 = linalg.fill ins(%cst_263 : f32) outs(%120 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> | |
| %122 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%119, %cst_143 : tensor<1x512x28x28xf32>, tensor<256x512x1x1xf32>) outs(%121 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %123 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122, %cst_140, %cst_139, %cst_142, %cst_141 : tensor<1x256x28x28xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%122 : tensor<1x256x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x28x28xf32> | |
| %124 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%123 : tensor<1x256x28x28xf32>) outs(%120 : tensor<1x256x28x28xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x28x28xf32> | |
| %125 = tensor.pad %124 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x28x28xf32> to tensor<1x256x30x30xf32> | |
| %126 = linalg.init_tensor [1, 256, 14, 14] : tensor<1x256x14x14xf32> | |
| %127 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %128 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%125, %cst_138 : tensor<1x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%127 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %129 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%128, %cst_135, %cst_134, %cst_137, %cst_136 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%128 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %130 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%129 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %131 = linalg.init_tensor [1, 1024, 14, 14] : tensor<1x1024x14x14xf32> | |
| %132 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %133 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%130, %cst_133 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%132 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %134 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%133, %cst_130, %cst_129, %cst_132, %cst_131 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%133 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %135 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %136 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%119, %cst_128 : tensor<1x512x28x28xf32>, tensor<1024x512x1x1xf32>) outs(%135 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %137 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%136, %cst_125, %cst_129, %cst_127, %cst_126 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%136 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %138 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134, %137 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %139 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %140 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %141 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%139, %cst_124 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%140 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %142 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141, %cst_121, %cst_120, %cst_123, %cst_122 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%141 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %143 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %144 = tensor.pad %143 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
| %145 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %146 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%144, %cst_119 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%145 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %147 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%146, %cst_116, %cst_115, %cst_118, %cst_117 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%146 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %148 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%147 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %149 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %150 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%148, %cst_114 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%149 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %151 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150, %cst_111, %cst_110, %cst_113, %cst_112 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%150 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %152 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%151, %139 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %153 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%152 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %154 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %155 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%153, %cst_109 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%154 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %156 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%155, %cst_106, %cst_105, %cst_108, %cst_107 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%155 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %157 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%156 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %158 = tensor.pad %157 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
| %159 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %160 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%158, %cst_104 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%159 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %161 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%160, %cst_101, %cst_100, %cst_103, %cst_102 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%160 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %162 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%161 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %163 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %164 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%162, %cst_99 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%163 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %165 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164, %cst_96, %cst_95, %cst_98, %cst_97 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%164 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %166 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%165, %153 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %167 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %168 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %169 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%167, %cst_94 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%168 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %170 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%169, %cst_91, %cst_90, %cst_93, %cst_92 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%169 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %171 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%170 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %172 = tensor.pad %171 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
| %173 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %174 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%172, %cst_89 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%173 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %175 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174, %cst_86, %cst_85, %cst_88, %cst_87 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%174 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %176 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%175 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %177 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %178 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%176, %cst_84 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%177 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %179 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%178, %cst_81, %cst_80, %cst_83, %cst_82 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%178 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %180 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179, %167 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %181 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%180 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %182 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %183 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%181, %cst_79 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%182 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %184 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%183, %cst_76, %cst_75, %cst_78, %cst_77 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%183 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %185 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%184 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %186 = tensor.pad %185 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
| %187 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %188 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%186, %cst_74 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%187 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %189 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%188, %cst_71, %cst_70, %cst_73, %cst_72 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%188 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %190 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%189 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %191 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %192 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%190, %cst_69 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%191 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %193 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%192, %cst_66, %cst_65, %cst_68, %cst_67 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%192 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %194 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%193, %181 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %195 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %196 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %197 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%195, %cst_64 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%196 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %198 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%197, %cst_61, %cst_60, %cst_63, %cst_62 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%197 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %199 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %200 = tensor.pad %199 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
| %201 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| %202 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%200, %cst_59 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%201 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %203 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202, %cst_56, %cst_55, %cst_58, %cst_57 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%202 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %204 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%203 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x256x14x14xf32> | |
| %205 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| %206 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%204, %cst_54 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%205 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %207 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206, %cst_51, %cst_50, %cst_53, %cst_52 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%206 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %208 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%207, %195 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %209 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x1024x14x14xf32> | |
| %210 = linalg.init_tensor [1, 512, 14, 14] : tensor<1x512x14x14xf32> | |
| %211 = linalg.fill ins(%cst_263 : f32) outs(%210 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> | |
| %212 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%209, %cst_49 : tensor<1x1024x14x14xf32>, tensor<512x1024x1x1xf32>) outs(%211 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %213 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%212, %cst_46, %cst_45, %cst_48, %cst_47 : tensor<1x512x14x14xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%212 : tensor<1x512x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x14x14xf32> | |
| %214 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%213 : tensor<1x512x14x14xf32>) outs(%210 : tensor<1x512x14x14xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x14x14xf32> | |
| %215 = tensor.pad %214 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x512x14x14xf32> to tensor<1x512x16x16xf32> | |
| %216 = linalg.init_tensor [1, 512, 7, 7] : tensor<1x512x7x7xf32> | |
| %217 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| %218 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%215, %cst_44 : tensor<1x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%217 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %219 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%218, %cst_41, %cst_40, %cst_43, %cst_42 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%218 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %220 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%219 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %221 = linalg.init_tensor [1, 2048, 7, 7] : tensor<1x2048x7x7xf32> | |
| %222 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| %223 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%220, %cst_39 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%222 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %224 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%223, %cst_36, %cst_35, %cst_38, %cst_37 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%223 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %225 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| %226 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%209, %cst_34 : tensor<1x1024x14x14xf32>, tensor<2048x1024x1x1xf32>) outs(%225 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %227 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%226, %cst_31, %cst_35, %cst_33, %cst_32 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%226 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %228 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%224, %227 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %229 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%228 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %230 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| %231 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%229, %cst_30 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%230 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %232 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%231, %cst_27, %cst_26, %cst_29, %cst_28 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%231 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %233 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%232 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %234 = tensor.pad %233 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> | |
| %235 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| %236 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%234, %cst_25 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%235 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %237 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%236, %cst_22, %cst_21, %cst_24, %cst_23 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%236 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %238 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %239 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| %240 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%238, %cst_20 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%239 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %241 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%240, %cst_17, %cst_16, %cst_19, %cst_18 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%240 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %242 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%241, %229 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %243 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%242 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %244 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| %245 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%243, %cst_15 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%244 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %246 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%245, %cst_12, %cst_11, %cst_14, %cst_13 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%245 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %247 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%246 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %248 = tensor.pad %247 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
| ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
| tensor.yield %cst_263 : f32 | |
| } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> | |
| %249 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| %250 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%248, %cst_10 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%249 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %251 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%250, %cst_7, %cst_6, %cst_9, %cst_8 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%250 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %252 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%251 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x512x7x7xf32> | |
| %253 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| %254 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%252, %cst_5 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%253 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
| cf.assert %4, "training is not supported for now" | |
| %255 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%254, %cst_2, %cst_1, %cst_4, %cst_3 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%254 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
| %269 = arith.truncf %cst_262 : f64 to f32 | |
| %270 = arith.addf %arg5, %269 : f32 | |
| %271 = math.rsqrt %270 : f32 | |
| %272 = arith.subf %arg1, %arg4 : f32 | |
| %273 = arith.mulf %272, %271 : f32 | |
| %274 = arith.mulf %273, %arg2 : f32 | |
| %275 = arith.addf %274, %arg3 : f32 | |
| linalg.yield %275 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %256 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%255, %243 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
| %269 = arith.addf %arg1, %arg2 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %257 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%256 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
| %270 = arith.select %269, %arg1, %cst_263 : f32 | |
| linalg.yield %270 : f32 | |
| } -> tensor<1x2048x7x7xf32> | |
| %258 = linalg.init_tensor [1, 2048, 1, 1] : tensor<1x2048x1x1xf32> | |
| %259 = linalg.fill ins(%cst_263 : f32) outs(%258 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32> | |
| %260 = linalg.init_tensor [7, 7] : tensor<7x7xf32> | |
| %261 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%257, %260 : tensor<1x2048x7x7xf32>, tensor<7x7xf32>) outs(%259 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32> | |
| %262 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%261 : tensor<1x2048x1x1xf32>) outs(%258 : tensor<1x2048x1x1xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| %269 = arith.divf %arg1, %cst_265 : f32 | |
| linalg.yield %269 : f32 | |
| } -> tensor<1x2048x1x1xf32> | |
| %263 = tensor.collapse_shape %262 [[0], [1, 2, 3]] : tensor<1x2048x1x1xf32> into tensor<1x2048xf32> | |
| %264 = linalg.init_tensor [1, 1000] : tensor<1x1000xf32> | |
| %265 = linalg.init_tensor [2048, 1000] : tensor<2048x1000xf32> | |
| %266 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst : tensor<1000xf32>) outs(%264 : tensor<1x1000xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| linalg.yield %arg1 : f32 | |
| } -> tensor<1x1000xf32> | |
| %267 = linalg.generic {indexing_maps = [#map5, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : tensor<1000x2048xf32>) outs(%265 : tensor<2048x1000xf32>) { | |
| ^bb0(%arg1: f32, %arg2: f32): | |
| linalg.yield %arg1 : f32 | |
| } -> tensor<2048x1000xf32> | |
| %268 = linalg.matmul ins(%263, %267 : tensor<1x2048xf32>, tensor<2048x1000xf32>) outs(%266 : tensor<1x1000xf32>) -> tensor<1x1000xf32> | |
| return %268 : tensor<1x1000xf32> | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment