Created
August 11, 2022 05:24
-
-
Save pashu123/274e2d6cf3b01040faff2814333a93b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> | |
#map1 = affine_map<(d0, d1, d2, d3) -> (d1)> | |
#map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> | |
#map3 = affine_map<(d0, d1) -> (d1)> | |
#map4 = affine_map<(d0, d1) -> (d0, d1)> | |
#map5 = affine_map<(d0, d1) -> (d1, d0)> | |
module attributes {torch.debug_module_name = "VisionModule"} { | |
func.func @forward(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
%false = arith.constant false | |
%cst = arith.constant dense_resource<__elided__> : tensor<1000xf32> | |
%cst_0 = arith.constant dense_resource<__elided__> : tensor<1000x2048xf32> | |
%cst_1 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_2 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_3 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_4 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_5 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
%cst_6 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_7 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_8 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_9 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_10 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
%cst_11 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_12 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_13 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_14 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_15 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32> | |
%cst_16 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_17 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_18 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_19 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_20 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
%cst_21 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_22 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_23 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_24 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_25 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
%cst_26 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_27 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_28 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_29 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_30 = arith.constant dense_resource<__elided__> : tensor<512x2048x1x1xf32> | |
%cst_31 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_32 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_33 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_34 = arith.constant dense_resource<__elided__> : tensor<2048x1024x1x1xf32> | |
%cst_35 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_36 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_37 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_38 = arith.constant dense_resource<__elided__> : tensor<2048xf32> | |
%cst_39 = arith.constant dense_resource<__elided__> : tensor<2048x512x1x1xf32> | |
%cst_40 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_41 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_42 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_43 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_44 = arith.constant dense_resource<__elided__> : tensor<512x512x3x3xf32> | |
%cst_45 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_46 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_47 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_48 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_49 = arith.constant dense_resource<__elided__> : tensor<512x1024x1x1xf32> | |
%cst_50 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_51 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_52 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_53 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_54 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_55 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_56 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_57 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_58 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_59 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_60 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_61 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_62 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_63 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_64 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
%cst_65 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_66 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_67 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_68 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_69 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_70 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_71 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_72 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_73 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_74 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_75 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_76 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_77 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_78 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_79 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
%cst_80 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_81 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_82 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_83 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_84 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_85 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_86 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_87 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_88 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_89 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_90 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_91 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_92 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_93 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_94 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
%cst_95 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_96 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_97 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_98 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_99 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_100 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_101 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_102 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_103 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_104 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_105 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_106 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_107 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_108 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_109 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
%cst_110 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_111 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_112 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_113 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_114 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_115 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_116 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_117 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_118 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_119 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_120 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_121 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_122 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_123 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_124 = arith.constant dense_resource<__elided__> : tensor<256x1024x1x1xf32> | |
%cst_125 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_126 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_127 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_128 = arith.constant dense_resource<__elided__> : tensor<1024x512x1x1xf32> | |
%cst_129 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_130 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_131 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_132 = arith.constant dense_resource<__elided__> : tensor<1024xf32> | |
%cst_133 = arith.constant dense_resource<__elided__> : tensor<1024x256x1x1xf32> | |
%cst_134 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_135 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_136 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_137 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_138 = arith.constant dense_resource<__elided__> : tensor<256x256x3x3xf32> | |
%cst_139 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_140 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_141 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_142 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_143 = arith.constant dense_resource<__elided__> : tensor<256x512x1x1xf32> | |
%cst_144 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_145 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_146 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_147 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_148 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
%cst_149 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_150 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_151 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_152 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_153 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
%cst_154 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_155 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_156 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_157 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_158 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
%cst_159 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_160 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_161 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_162 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_163 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
%cst_164 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_165 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_166 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_167 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_168 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
%cst_169 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_170 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_171 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_172 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_173 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
%cst_174 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_175 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_176 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_177 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_178 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
%cst_179 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_180 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_181 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_182 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_183 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
%cst_184 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_185 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_186 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_187 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_188 = arith.constant dense_resource<__elided__> : tensor<128x512x1x1xf32> | |
%cst_189 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_190 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_191 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_192 = arith.constant dense_resource<__elided__> : tensor<512x256x1x1xf32> | |
%cst_193 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_194 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_195 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_196 = arith.constant dense_resource<__elided__> : tensor<512xf32> | |
%cst_197 = arith.constant dense_resource<__elided__> : tensor<512x128x1x1xf32> | |
%cst_198 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_199 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_200 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_201 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_202 = arith.constant dense_resource<__elided__> : tensor<128x128x3x3xf32> | |
%cst_203 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_204 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_205 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_206 = arith.constant dense_resource<__elided__> : tensor<128xf32> | |
%cst_207 = arith.constant dense_resource<__elided__> : tensor<128x256x1x1xf32> | |
%cst_208 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_209 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_210 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_211 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_212 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
%cst_213 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_214 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_215 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_216 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_217 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
%cst_218 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_219 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_220 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_221 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_222 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32> | |
%cst_223 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_224 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_225 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_226 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_227 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
%cst_228 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_229 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_230 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_231 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_232 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
%cst_233 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_234 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_235 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_236 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_237 = arith.constant dense_resource<__elided__> : tensor<64x256x1x1xf32> | |
%cst_238 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_239 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_240 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_241 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
%cst_242 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_243 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_244 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_245 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_246 = arith.constant dense_resource<__elided__> : tensor<256x64x1x1xf32> | |
%cst_247 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_248 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_249 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_250 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_251 = arith.constant dense_resource<__elided__> : tensor<64x64x3x3xf32> | |
%cst_252 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_253 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_254 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_255 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_256 = arith.constant dense_resource<__elided__> : tensor<64x64x1x1xf32> | |
%cst_257 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_258 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_259 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_260 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_261 = arith.constant dense_resource<__elided__> : tensor<64x3x7x7xf32> | |
%cst_262 = arith.constant 1.000000e-05 : f64 | |
%cst_263 = arith.constant 0.000000e+00 : f32 | |
%cst_264 = arith.constant -3.40282347E+38 : f32 | |
%cst_265 = arith.constant 4.900000e+01 : f32 | |
%0 = tensor.pad %arg0 low[0, 0, 3, 3] high[0, 0, 3, 3] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x3x224x224xf32> to tensor<1x3x230x230xf32> | |
%1 = linalg.init_tensor [1, 64, 112, 112] : tensor<1x64x112x112xf32> | |
%2 = linalg.fill ins(%cst_263 : f32) outs(%1 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> | |
%3 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%0, %cst_261 : tensor<1x3x230x230xf32>, tensor<64x3x7x7xf32>) outs(%2 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> | |
%4 = arith.cmpi eq, %false, %false : i1 | |
cf.assert %4, "training is not supported for now" | |
%5 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%3, %cst_258, %cst_257, %cst_260, %cst_259 : tensor<1x64x112x112xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%3 : tensor<1x64x112x112xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x112x112xf32> | |
%6 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%5 : tensor<1x64x112x112xf32>) outs(%1 : tensor<1x64x112x112xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x112x112xf32> | |
%7 = tensor.pad %6 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_264 : f32 | |
} : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32> | |
%8 = linalg.init_tensor [1, 64, 56, 56] : tensor<1x64x56x56xf32> | |
%9 = linalg.fill ins(%cst_264 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%10 = linalg.init_tensor [3, 3] : tensor<3x3xf32> | |
%11 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%7, %10 : tensor<1x64x114x114xf32>, tensor<3x3xf32>) outs(%9 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%12 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%13 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_256 : tensor<1x64x56x56xf32>, tensor<64x64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%14 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %cst_253, %cst_252, %cst_255, %cst_254 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%13 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%15 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%16 = tensor.pad %15 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
%17 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%18 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%16, %cst_251 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%17 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%19 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %cst_248, %cst_247, %cst_250, %cst_249 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%18 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%20 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%21 = linalg.init_tensor [1, 256, 56, 56] : tensor<1x256x56x56xf32> | |
%22 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
%23 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%20, %cst_246 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%22 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%24 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%23, %cst_243, %cst_242, %cst_245, %cst_244 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%23 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%25 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
%26 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_241 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%25 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%27 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26, %cst_238, %cst_242, %cst_240, %cst_239 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%26 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%28 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%24, %27 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%29 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%28 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%30 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%31 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%29, %cst_237 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%30 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%32 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%31, %cst_234, %cst_233, %cst_236, %cst_235 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%31 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%33 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%32 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%34 = tensor.pad %33 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
%35 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%36 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%34, %cst_232 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%35 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%37 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%36, %cst_229, %cst_228, %cst_231, %cst_230 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%36 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%38 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%37 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%39 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
%40 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%38, %cst_227 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%39 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%41 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %cst_224, %cst_223, %cst_226, %cst_225 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%40 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%42 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%41, %29 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%43 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%42 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%44 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%45 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%43, %cst_222 : tensor<1x256x56x56xf32>, tensor<64x256x1x1xf32>) outs(%44 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%46 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%45, %cst_219, %cst_218, %cst_221, %cst_220 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%45 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%47 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%48 = tensor.pad %47 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> | |
%49 = linalg.fill ins(%cst_263 : f32) outs(%8 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
%50 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%48, %cst_217 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%49 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%51 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50, %cst_214, %cst_213, %cst_216, %cst_215 : tensor<1x64x56x56xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) outs(%50 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%52 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%51 : tensor<1x64x56x56xf32>) outs(%8 : tensor<1x64x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x64x56x56xf32> | |
%53 = linalg.fill ins(%cst_263 : f32) outs(%21 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
%54 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%52, %cst_212 : tensor<1x64x56x56xf32>, tensor<256x64x1x1xf32>) outs(%53 : tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%55 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54, %cst_209, %cst_208, %cst_211, %cst_210 : tensor<1x256x56x56xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%54 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%56 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%55, %43 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%57 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%56 : tensor<1x256x56x56xf32>) outs(%21 : tensor<1x256x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x56x56xf32> | |
%58 = linalg.init_tensor [1, 128, 56, 56] : tensor<1x128x56x56xf32> | |
%59 = linalg.fill ins(%cst_263 : f32) outs(%58 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> | |
%60 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%57, %cst_207 : tensor<1x256x56x56xf32>, tensor<128x256x1x1xf32>) outs(%59 : tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> | |
cf.assert %4, "training is not supported for now" | |
%61 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%60, %cst_204, %cst_203, %cst_206, %cst_205 : tensor<1x128x56x56xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%60 : tensor<1x128x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x56x56xf32> | |
%62 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%61 : tensor<1x128x56x56xf32>) outs(%58 : tensor<1x128x56x56xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x56x56xf32> | |
%63 = tensor.pad %62 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x128x56x56xf32> to tensor<1x128x58x58xf32> | |
%64 = linalg.init_tensor [1, 128, 28, 28] : tensor<1x128x28x28xf32> | |
%65 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%66 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%63, %cst_202 : tensor<1x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%65 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%67 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66, %cst_199, %cst_198, %cst_201, %cst_200 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%66 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%68 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%67 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%69 = linalg.init_tensor [1, 512, 28, 28] : tensor<1x512x28x28xf32> | |
%70 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
%71 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%68, %cst_197 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%70 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%72 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%71, %cst_194, %cst_193, %cst_196, %cst_195 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%71 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%73 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
%74 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%57, %cst_192 : tensor<1x256x56x56xf32>, tensor<512x256x1x1xf32>) outs(%73 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%75 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74, %cst_189, %cst_193, %cst_191, %cst_190 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%74 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%76 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%72, %75 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%77 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%76 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%78 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%79 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%77, %cst_188 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%78 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%80 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79, %cst_185, %cst_184, %cst_187, %cst_186 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%79 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%81 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%80 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%82 = tensor.pad %81 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
%83 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%84 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%82, %cst_183 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%83 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%85 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%84, %cst_180, %cst_179, %cst_182, %cst_181 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%84 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%86 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%85 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%87 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
%88 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%86, %cst_178 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%87 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%89 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%88, %cst_175, %cst_174, %cst_177, %cst_176 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%88 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%90 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%89, %77 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%91 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%90 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%92 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%93 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%91, %cst_173 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%92 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%94 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93, %cst_170, %cst_169, %cst_172, %cst_171 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%93 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%95 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%96 = tensor.pad %95 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
%97 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%98 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%96, %cst_168 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%97 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%99 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98, %cst_165, %cst_164, %cst_167, %cst_166 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%98 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%100 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%99 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%101 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
%102 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%100, %cst_163 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%101 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%103 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102, %cst_160, %cst_159, %cst_162, %cst_161 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%102 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%104 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%103, %91 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%105 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%104 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%106 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%107 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%105, %cst_158 : tensor<1x512x28x28xf32>, tensor<128x512x1x1xf32>) outs(%106 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%108 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%107, %cst_155, %cst_154, %cst_157, %cst_156 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%107 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%109 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%108 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%110 = tensor.pad %109 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> | |
%111 = linalg.fill ins(%cst_263 : f32) outs(%64 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
%112 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%110, %cst_153 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%111 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%113 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%112, %cst_150, %cst_149, %cst_152, %cst_151 : tensor<1x128x28x28xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%112 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%114 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%113 : tensor<1x128x28x28xf32>) outs(%64 : tensor<1x128x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x128x28x28xf32> | |
%115 = linalg.fill ins(%cst_263 : f32) outs(%69 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
%116 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%114, %cst_148 : tensor<1x128x28x28xf32>, tensor<512x128x1x1xf32>) outs(%115 : tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%117 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%116, %cst_145, %cst_144, %cst_147, %cst_146 : tensor<1x512x28x28xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%116 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%118 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%117, %105 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%119 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<1x512x28x28xf32>) outs(%69 : tensor<1x512x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x28x28xf32> | |
%120 = linalg.init_tensor [1, 256, 28, 28] : tensor<1x256x28x28xf32> | |
%121 = linalg.fill ins(%cst_263 : f32) outs(%120 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> | |
%122 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%119, %cst_143 : tensor<1x512x28x28xf32>, tensor<256x512x1x1xf32>) outs(%121 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> | |
cf.assert %4, "training is not supported for now" | |
%123 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122, %cst_140, %cst_139, %cst_142, %cst_141 : tensor<1x256x28x28xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%122 : tensor<1x256x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x28x28xf32> | |
%124 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%123 : tensor<1x256x28x28xf32>) outs(%120 : tensor<1x256x28x28xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x28x28xf32> | |
%125 = tensor.pad %124 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x28x28xf32> to tensor<1x256x30x30xf32> | |
%126 = linalg.init_tensor [1, 256, 14, 14] : tensor<1x256x14x14xf32> | |
%127 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%128 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%125, %cst_138 : tensor<1x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%127 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%129 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%128, %cst_135, %cst_134, %cst_137, %cst_136 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%128 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%130 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%129 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%131 = linalg.init_tensor [1, 1024, 14, 14] : tensor<1x1024x14x14xf32> | |
%132 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%133 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%130, %cst_133 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%132 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%134 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%133, %cst_130, %cst_129, %cst_132, %cst_131 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%133 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%135 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%136 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%119, %cst_128 : tensor<1x512x28x28xf32>, tensor<1024x512x1x1xf32>) outs(%135 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%137 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%136, %cst_125, %cst_129, %cst_127, %cst_126 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%136 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%138 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134, %137 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%139 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%140 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%141 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%139, %cst_124 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%140 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%142 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141, %cst_121, %cst_120, %cst_123, %cst_122 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%141 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%143 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%144 = tensor.pad %143 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
%145 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%146 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%144, %cst_119 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%145 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%147 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%146, %cst_116, %cst_115, %cst_118, %cst_117 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%146 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%148 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%147 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%149 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%150 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%148, %cst_114 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%149 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%151 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150, %cst_111, %cst_110, %cst_113, %cst_112 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%150 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%152 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%151, %139 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%153 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%152 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%154 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%155 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%153, %cst_109 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%154 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%156 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%155, %cst_106, %cst_105, %cst_108, %cst_107 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%155 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%157 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%156 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%158 = tensor.pad %157 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
%159 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%160 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%158, %cst_104 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%159 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%161 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%160, %cst_101, %cst_100, %cst_103, %cst_102 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%160 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%162 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%161 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%163 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%164 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%162, %cst_99 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%163 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%165 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164, %cst_96, %cst_95, %cst_98, %cst_97 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%164 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%166 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%165, %153 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%167 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%168 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%169 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%167, %cst_94 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%168 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%170 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%169, %cst_91, %cst_90, %cst_93, %cst_92 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%169 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%171 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%170 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%172 = tensor.pad %171 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
%173 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%174 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%172, %cst_89 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%173 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%175 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174, %cst_86, %cst_85, %cst_88, %cst_87 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%174 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%176 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%175 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%177 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%178 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%176, %cst_84 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%177 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%179 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%178, %cst_81, %cst_80, %cst_83, %cst_82 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%178 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%180 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179, %167 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%181 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%180 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%182 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%183 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%181, %cst_79 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%182 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%184 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%183, %cst_76, %cst_75, %cst_78, %cst_77 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%183 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%185 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%184 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%186 = tensor.pad %185 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
%187 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%188 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%186, %cst_74 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%187 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%189 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%188, %cst_71, %cst_70, %cst_73, %cst_72 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%188 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%190 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%189 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%191 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%192 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%190, %cst_69 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%191 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%193 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%192, %cst_66, %cst_65, %cst_68, %cst_67 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%192 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%194 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%193, %181 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%195 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%196 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%197 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%195, %cst_64 : tensor<1x1024x14x14xf32>, tensor<256x1024x1x1xf32>) outs(%196 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%198 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%197, %cst_61, %cst_60, %cst_63, %cst_62 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%197 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%199 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%200 = tensor.pad %199 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> | |
%201 = linalg.fill ins(%cst_263 : f32) outs(%126 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
%202 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%200, %cst_59 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%201 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%203 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202, %cst_56, %cst_55, %cst_58, %cst_57 : tensor<1x256x14x14xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) outs(%202 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%204 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%203 : tensor<1x256x14x14xf32>) outs(%126 : tensor<1x256x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x256x14x14xf32> | |
%205 = linalg.fill ins(%cst_263 : f32) outs(%131 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
%206 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%204, %cst_54 : tensor<1x256x14x14xf32>, tensor<1024x256x1x1xf32>) outs(%205 : tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%207 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206, %cst_51, %cst_50, %cst_53, %cst_52 : tensor<1x1024x14x14xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) outs(%206 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%208 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%207, %195 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%209 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208 : tensor<1x1024x14x14xf32>) outs(%131 : tensor<1x1024x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x1024x14x14xf32> | |
%210 = linalg.init_tensor [1, 512, 14, 14] : tensor<1x512x14x14xf32> | |
%211 = linalg.fill ins(%cst_263 : f32) outs(%210 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> | |
%212 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%209, %cst_49 : tensor<1x1024x14x14xf32>, tensor<512x1024x1x1xf32>) outs(%211 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> | |
cf.assert %4, "training is not supported for now" | |
%213 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%212, %cst_46, %cst_45, %cst_48, %cst_47 : tensor<1x512x14x14xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%212 : tensor<1x512x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x14x14xf32> | |
%214 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%213 : tensor<1x512x14x14xf32>) outs(%210 : tensor<1x512x14x14xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x14x14xf32> | |
%215 = tensor.pad %214 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x512x14x14xf32> to tensor<1x512x16x16xf32> | |
%216 = linalg.init_tensor [1, 512, 7, 7] : tensor<1x512x7x7xf32> | |
%217 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
%218 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%215, %cst_44 : tensor<1x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%217 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%219 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%218, %cst_41, %cst_40, %cst_43, %cst_42 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%218 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%220 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%219 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%221 = linalg.init_tensor [1, 2048, 7, 7] : tensor<1x2048x7x7xf32> | |
%222 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
%223 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%220, %cst_39 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%222 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%224 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%223, %cst_36, %cst_35, %cst_38, %cst_37 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%223 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%225 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
%226 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%209, %cst_34 : tensor<1x1024x14x14xf32>, tensor<2048x1024x1x1xf32>) outs(%225 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%227 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%226, %cst_31, %cst_35, %cst_33, %cst_32 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%226 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%228 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%224, %227 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%229 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%228 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%230 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
%231 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%229, %cst_30 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%230 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%232 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%231, %cst_27, %cst_26, %cst_29, %cst_28 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%231 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%233 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%232 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%234 = tensor.pad %233 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> | |
%235 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
%236 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%234, %cst_25 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%235 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%237 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%236, %cst_22, %cst_21, %cst_24, %cst_23 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%236 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%238 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%239 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
%240 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%238, %cst_20 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%239 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%241 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%240, %cst_17, %cst_16, %cst_19, %cst_18 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%240 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%242 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%241, %229 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%243 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%242 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%244 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
%245 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%243, %cst_15 : tensor<1x2048x7x7xf32>, tensor<512x2048x1x1xf32>) outs(%244 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%246 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%245, %cst_12, %cst_11, %cst_14, %cst_13 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%245 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%247 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%246 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%248 = tensor.pad %247 low[0, 0, 1, 1] high[0, 0, 1, 1] { | |
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): | |
tensor.yield %cst_263 : f32 | |
} : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> | |
%249 = linalg.fill ins(%cst_263 : f32) outs(%216 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
%250 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%248, %cst_10 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%249 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%251 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%250, %cst_7, %cst_6, %cst_9, %cst_8 : tensor<1x512x7x7xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) outs(%250 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%252 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%251 : tensor<1x512x7x7xf32>) outs(%216 : tensor<1x512x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x512x7x7xf32> | |
%253 = linalg.fill ins(%cst_263 : f32) outs(%221 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
%254 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%252, %cst_5 : tensor<1x512x7x7xf32>, tensor<2048x512x1x1xf32>) outs(%253 : tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> | |
cf.assert %4, "training is not supported for now" | |
%255 = linalg.generic {indexing_maps = [#map0, #map1, #map1, #map1, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%254, %cst_2, %cst_1, %cst_4, %cst_3 : tensor<1x2048x7x7xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) outs(%254 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): | |
%269 = arith.truncf %cst_262 : f64 to f32 | |
%270 = arith.addf %arg5, %269 : f32 | |
%271 = math.rsqrt %270 : f32 | |
%272 = arith.subf %arg1, %arg4 : f32 | |
%273 = arith.mulf %272, %271 : f32 | |
%274 = arith.mulf %273, %arg2 : f32 | |
%275 = arith.addf %274, %arg3 : f32 | |
linalg.yield %275 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%256 = linalg.generic {indexing_maps = [#map2, #map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%255, %243 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%269 = arith.addf %arg1, %arg2 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%257 = linalg.generic {indexing_maps = [#map2, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%256 : tensor<1x2048x7x7xf32>) outs(%221 : tensor<1x2048x7x7xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.cmpf ugt, %arg1, %cst_263 : f32 | |
%270 = arith.select %269, %arg1, %cst_263 : f32 | |
linalg.yield %270 : f32 | |
} -> tensor<1x2048x7x7xf32> | |
%258 = linalg.init_tensor [1, 2048, 1, 1] : tensor<1x2048x1x1xf32> | |
%259 = linalg.fill ins(%cst_263 : f32) outs(%258 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32> | |
%260 = linalg.init_tensor [7, 7] : tensor<7x7xf32> | |
%261 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%257, %260 : tensor<1x2048x7x7xf32>, tensor<7x7xf32>) outs(%259 : tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32> | |
%262 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%261 : tensor<1x2048x1x1xf32>) outs(%258 : tensor<1x2048x1x1xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
%269 = arith.divf %arg1, %cst_265 : f32 | |
linalg.yield %269 : f32 | |
} -> tensor<1x2048x1x1xf32> | |
%263 = tensor.collapse_shape %262 [[0], [1, 2, 3]] : tensor<1x2048x1x1xf32> into tensor<1x2048xf32> | |
%264 = linalg.init_tensor [1, 1000] : tensor<1x1000xf32> | |
%265 = linalg.init_tensor [2048, 1000] : tensor<2048x1000xf32> | |
%266 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst : tensor<1000xf32>) outs(%264 : tensor<1x1000xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
linalg.yield %arg1 : f32 | |
} -> tensor<1x1000xf32> | |
%267 = linalg.generic {indexing_maps = [#map5, #map4], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : tensor<1000x2048xf32>) outs(%265 : tensor<2048x1000xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
linalg.yield %arg1 : f32 | |
} -> tensor<2048x1000xf32> | |
%268 = linalg.matmul ins(%263, %267 : tensor<1x2048xf32>, tensor<2048x1000xf32>) outs(%266 : tensor<1x1000xf32>) -> tensor<1x1000xf32> | |
return %268 : tensor<1x1000xf32> | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment