Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save AmosLewis/9f82db0cfc6b3397681bcf336d09e835 to your computer and use it in GitHub Desktop.

Select an option

Save AmosLewis/9f82db0cfc6b3397681bcf336d09e835 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
#map = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0) -> (d0)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d1)>
#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, 0, 0)>
#map5 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>
#map6 = affine_map<(d0, d1, d2, d3) -> ()>
#map7 = affine_map<(d0, d1) -> (d0, d1)>
#map8 = affine_map<(d0, d1) -> (d1, d0)>
#map9 = affine_map<(d0, d1) -> (0, d1)>
#map10 = affine_map<(d0, d1) -> (d1)>
module {
func.func @torch_jit(%arg0: !torch.vtensor<[1,3,224,224],f32>) -> !torch.vtensor<[1,1000],f32> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.13.1"} {
%0 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[1,3,224,224],f32> -> tensor<1x3x224x224xf32>
%1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x3x7x7xf32>) : !torch.vtensor<[64,3,7,7],f32>
%2 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[64,3,7,7],f32> -> tensor<64x3x7x7xf32>
%3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%4 = torch_c.to_builtin_tensor %3 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x1x1xf32>) : !torch.vtensor<[64,64,1,1],f32>
%6 = torch_c.to_builtin_tensor %5 : !torch.vtensor<[64,64,1,1],f32> -> tensor<64x64x1x1xf32>
%7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%8 = torch_c.to_builtin_tensor %7 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
%10 = torch_c.to_builtin_tensor %9 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%12 = torch_c.to_builtin_tensor %11 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
%14 = torch_c.to_builtin_tensor %13 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%16 = torch_c.to_builtin_tensor %15 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
%18 = torch_c.to_builtin_tensor %17 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%20 = torch_c.to_builtin_tensor %19 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x256x1x1xf32>) : !torch.vtensor<[64,256,1,1],f32>
%22 = torch_c.to_builtin_tensor %21 : !torch.vtensor<[64,256,1,1],f32> -> tensor<64x256x1x1xf32>
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%24 = torch_c.to_builtin_tensor %23 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
%26 = torch_c.to_builtin_tensor %25 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%28 = torch_c.to_builtin_tensor %27 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
%30 = torch_c.to_builtin_tensor %29 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%32 = torch_c.to_builtin_tensor %31 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x256x1x1xf32>) : !torch.vtensor<[64,256,1,1],f32>
%34 = torch_c.to_builtin_tensor %33 : !torch.vtensor<[64,256,1,1],f32> -> tensor<64x256x1x1xf32>
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%36 = torch_c.to_builtin_tensor %35 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
%38 = torch_c.to_builtin_tensor %37 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
%40 = torch_c.to_builtin_tensor %39 : !torch.vtensor<[64],f32> -> tensor<64xf32>
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
%42 = torch_c.to_builtin_tensor %41 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%44 = torch_c.to_builtin_tensor %43 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x256x1x1xf32>) : !torch.vtensor<[128,256,1,1],f32>
%46 = torch_c.to_builtin_tensor %45 : !torch.vtensor<[128,256,1,1],f32> -> tensor<128x256x1x1xf32>
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%48 = torch_c.to_builtin_tensor %47 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
%50 = torch_c.to_builtin_tensor %49 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
%51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%52 = torch_c.to_builtin_tensor %51 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
%54 = torch_c.to_builtin_tensor %53 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
%55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%56 = torch_c.to_builtin_tensor %55 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x256x1x1xf32>) : !torch.vtensor<[512,256,1,1],f32>
%58 = torch_c.to_builtin_tensor %57 : !torch.vtensor<[512,256,1,1],f32> -> tensor<512x256x1x1xf32>
%59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%60 = torch_c.to_builtin_tensor %59 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
%62 = torch_c.to_builtin_tensor %61 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
%63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%64 = torch_c.to_builtin_tensor %63 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
%66 = torch_c.to_builtin_tensor %65 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
%67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%68 = torch_c.to_builtin_tensor %67 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
%70 = torch_c.to_builtin_tensor %69 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
%71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%72 = torch_c.to_builtin_tensor %71 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
%74 = torch_c.to_builtin_tensor %73 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
%75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%76 = torch_c.to_builtin_tensor %75 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
%78 = torch_c.to_builtin_tensor %77 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
%79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%80 = torch_c.to_builtin_tensor %79 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
%82 = torch_c.to_builtin_tensor %81 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
%83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%84 = torch_c.to_builtin_tensor %83 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
%86 = torch_c.to_builtin_tensor %85 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
%87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%88 = torch_c.to_builtin_tensor %87 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
%90 = torch_c.to_builtin_tensor %89 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
%91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
%92 = torch_c.to_builtin_tensor %91 : !torch.vtensor<[128],f32> -> tensor<128xf32>
%93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
%94 = torch_c.to_builtin_tensor %93 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
%95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%96 = torch_c.to_builtin_tensor %95 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x512x1x1xf32>) : !torch.vtensor<[256,512,1,1],f32>
%98 = torch_c.to_builtin_tensor %97 : !torch.vtensor<[256,512,1,1],f32> -> tensor<256x512x1x1xf32>
%99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%100 = torch_c.to_builtin_tensor %99 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%102 = torch_c.to_builtin_tensor %101 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%104 = torch_c.to_builtin_tensor %103 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%106 = torch_c.to_builtin_tensor %105 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%108 = torch_c.to_builtin_tensor %107 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x512x1x1xf32>) : !torch.vtensor<[1024,512,1,1],f32>
%110 = torch_c.to_builtin_tensor %109 : !torch.vtensor<[1024,512,1,1],f32> -> tensor<1024x512x1x1xf32>
%111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%112 = torch_c.to_builtin_tensor %111 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
%114 = torch_c.to_builtin_tensor %113 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
%115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%116 = torch_c.to_builtin_tensor %115 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%118 = torch_c.to_builtin_tensor %117 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%120 = torch_c.to_builtin_tensor %119 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%122 = torch_c.to_builtin_tensor %121 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%124 = torch_c.to_builtin_tensor %123 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
%126 = torch_c.to_builtin_tensor %125 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
%127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%128 = torch_c.to_builtin_tensor %127 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%130 = torch_c.to_builtin_tensor %129 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%132 = torch_c.to_builtin_tensor %131 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%134 = torch_c.to_builtin_tensor %133 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%136 = torch_c.to_builtin_tensor %135 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
%138 = torch_c.to_builtin_tensor %137 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
%139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%140 = torch_c.to_builtin_tensor %139 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%142 = torch_c.to_builtin_tensor %141 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%144 = torch_c.to_builtin_tensor %143 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%146 = torch_c.to_builtin_tensor %145 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%148 = torch_c.to_builtin_tensor %147 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
%150 = torch_c.to_builtin_tensor %149 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
%151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%152 = torch_c.to_builtin_tensor %151 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%154 = torch_c.to_builtin_tensor %153 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%156 = torch_c.to_builtin_tensor %155 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%158 = torch_c.to_builtin_tensor %157 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%160 = torch_c.to_builtin_tensor %159 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
%162 = torch_c.to_builtin_tensor %161 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
%163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%164 = torch_c.to_builtin_tensor %163 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
%166 = torch_c.to_builtin_tensor %165 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
%167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
%168 = torch_c.to_builtin_tensor %167 : !torch.vtensor<[256],f32> -> tensor<256xf32>
%169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
%170 = torch_c.to_builtin_tensor %169 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
%171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
%172 = torch_c.to_builtin_tensor %171 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
%173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x1024x1x1xf32>) : !torch.vtensor<[512,1024,1,1],f32>
%174 = torch_c.to_builtin_tensor %173 : !torch.vtensor<[512,1024,1,1],f32> -> tensor<512x1024x1x1xf32>
%175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%176 = torch_c.to_builtin_tensor %175 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
%178 = torch_c.to_builtin_tensor %177 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
%179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%180 = torch_c.to_builtin_tensor %179 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
%182 = torch_c.to_builtin_tensor %181 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
%183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
%184 = torch_c.to_builtin_tensor %183 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
%185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x1024x1x1xf32>) : !torch.vtensor<[2048,1024,1,1],f32>
%186 = torch_c.to_builtin_tensor %185 : !torch.vtensor<[2048,1024,1,1],f32> -> tensor<2048x1024x1x1xf32>
%187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
%188 = torch_c.to_builtin_tensor %187 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
%189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x2048x1x1xf32>) : !torch.vtensor<[512,2048,1,1],f32>
%190 = torch_c.to_builtin_tensor %189 : !torch.vtensor<[512,2048,1,1],f32> -> tensor<512x2048x1x1xf32>
%191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%192 = torch_c.to_builtin_tensor %191 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
%194 = torch_c.to_builtin_tensor %193 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
%195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%196 = torch_c.to_builtin_tensor %195 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
%198 = torch_c.to_builtin_tensor %197 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
%199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
%200 = torch_c.to_builtin_tensor %199 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
%201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x2048x1x1xf32>) : !torch.vtensor<[512,2048,1,1],f32>
%202 = torch_c.to_builtin_tensor %201 : !torch.vtensor<[512,2048,1,1],f32> -> tensor<512x2048x1x1xf32>
%203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%204 = torch_c.to_builtin_tensor %203 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
%206 = torch_c.to_builtin_tensor %205 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
%207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
%208 = torch_c.to_builtin_tensor %207 : !torch.vtensor<[512],f32> -> tensor<512xf32>
%209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
%210 = torch_c.to_builtin_tensor %209 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
%211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
%212 = torch_c.to_builtin_tensor %211 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
%213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1000x2048xf32>) : !torch.vtensor<[1000,2048],f32>
%214 = torch_c.to_builtin_tensor %213 : !torch.vtensor<[1000,2048],f32> -> tensor<1000x2048xf32>
%215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1000xf32>) : !torch.vtensor<[1000],f32>
%216 = torch_c.to_builtin_tensor %215 : !torch.vtensor<[1000],f32> -> tensor<1000xf32>
%none = torch.constant.none
%217 = torch.vtensor.literal(dense<6.250000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%218 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12 = torch.constant.int 12
%219 = torch.aten.item %217 : !torch.vtensor<[],f32> -> !torch.float
%220 = torch_c.to_f64 %219
%221 = torch.aten.item %218 : !torch.vtensor<[],si8> -> !torch.int
%222 = torch_c.to_i64 %221
%c1 = arith.constant 1 : index
%c1_0 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c2 = arith.constant 2 : index
%c224 = arith.constant 224 : index
%c3_1 = arith.constant 3 : index
%c224_2 = arith.constant 224 : index
%223 = tensor.empty() : tensor<1x3x224x224xi8>
%224 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<1x3x224x224xf32>) outs(%223 : tensor<1x3x224x224xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %221
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %219
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x3x224x224xi8>
%cast = tensor.cast %224 : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
%cast_3 = tensor.cast %cast : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
%225 = torch.vtensor.literal(dense<6.250000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%226 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%227 = torch.aten.item %225 : !torch.vtensor<[],f32> -> !torch.float
%228 = torch_c.to_f64 %227
%229 = torch.aten.item %226 : !torch.vtensor<[],si8> -> !torch.int
%230 = torch_c.to_i64 %229
%cast_4 = tensor.cast %cast_3 : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
%c1_5 = arith.constant 1 : index
%c1_6 = arith.constant 1 : index
%c3_7 = arith.constant 3 : index
%c2_8 = arith.constant 2 : index
%c224_9 = arith.constant 224 : index
%c3_10 = arith.constant 3 : index
%c224_11 = arith.constant 224 : index
%231 = tensor.empty() : tensor<1x3x224x224xf32>
%232 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4 : tensor<1x3x224x224xi8>) outs(%231 : tensor<1x3x224x224xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %229
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %227
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x3x224x224xf32>
%cast_12 = tensor.cast %232 : tensor<1x3x224x224xf32> to tensor<1x3x224x224xf32>
%233 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%234 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_13 = torch.constant.int 12
%235 = torch.aten.item %233 : !torch.vtensor<[],f32> -> !torch.float
%236 = torch_c.to_f64 %235
%237 = torch.aten.item %234 : !torch.vtensor<[],si8> -> !torch.int
%238 = torch_c.to_i64 %237
%c1_14 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c1_15 = arith.constant 1 : index
%c3_16 = arith.constant 3 : index
%c2_17 = arith.constant 2 : index
%c7 = arith.constant 7 : index
%c3_18 = arith.constant 3 : index
%c7_19 = arith.constant 7 : index
%239 = tensor.empty() : tensor<64x3x7x7xi8>
%240 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x3x7x7xf32>) outs(%239 : tensor<64x3x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %237
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %235
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x3x7x7xi8>
%cast_20 = tensor.cast %240 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
%cast_21 = tensor.cast %cast_20 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
%241 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%242 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%243 = torch.aten.item %241 : !torch.vtensor<[],f32> -> !torch.float
%244 = torch_c.to_f64 %243
%245 = torch.aten.item %242 : !torch.vtensor<[],si8> -> !torch.int
%246 = torch_c.to_i64 %245
%cast_22 = tensor.cast %cast_21 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
%c1_23 = arith.constant 1 : index
%c0_24 = arith.constant 0 : index
%c64_25 = arith.constant 64 : index
%c1_26 = arith.constant 1 : index
%c3_27 = arith.constant 3 : index
%c2_28 = arith.constant 2 : index
%c7_29 = arith.constant 7 : index
%c3_30 = arith.constant 3 : index
%c7_31 = arith.constant 7 : index
%247 = tensor.empty() : tensor<64x3x7x7xf32>
%248 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_22 : tensor<64x3x7x7xi8>) outs(%247 : tensor<64x3x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %245
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %243
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x3x7x7xf32>
%cast_32 = tensor.cast %248 : tensor<64x3x7x7xf32> to tensor<64x3x7x7xf32>
%249 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%250 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_33 = torch.constant.int 12
%251 = torch.aten.item %249 : !torch.vtensor<[],f32> -> !torch.float
%252 = torch_c.to_f64 %251
%253 = torch.aten.item %250 : !torch.vtensor<[],si8> -> !torch.int
%254 = torch_c.to_i64 %253
%c1_34 = arith.constant 1 : index
%c0_35 = arith.constant 0 : index
%c64_36 = arith.constant 64 : index
%255 = tensor.empty() : tensor<64xi8>
%256 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%4 : tensor<64xf32>) outs(%255 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %253
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %251
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_37 = tensor.cast %256 : tensor<64xi8> to tensor<64xi8>
%cast_38 = tensor.cast %cast_37 : tensor<64xi8> to tensor<64xi8>
%257 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%258 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%259 = torch.aten.item %257 : !torch.vtensor<[],f32> -> !torch.float
%260 = torch_c.to_f64 %259
%261 = torch.aten.item %258 : !torch.vtensor<[],si8> -> !torch.int
%262 = torch_c.to_i64 %261
%cast_39 = tensor.cast %cast_38 : tensor<64xi8> to tensor<64xi8>
%c1_40 = arith.constant 1 : index
%c0_41 = arith.constant 0 : index
%c64_42 = arith.constant 64 : index
%263 = tensor.empty() : tensor<64xf32>
%264 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_39 : tensor<64xi8>) outs(%263 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %261
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %259
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_43 = tensor.cast %264 : tensor<64xf32> to tensor<64xf32>
%int3 = torch.constant.int 3
%int3_44 = torch.constant.int 3
%int1 = torch.constant.int 1
%int1_45 = torch.constant.int 1
%int2 = torch.constant.int 2
%int2_46 = torch.constant.int 2
%int0 = torch.constant.int 0
%265 = torch.prim.ListConstruct %int3, %int3_44 : (!torch.int, !torch.int) -> !torch.list<int>
%266 = torch.prim.ListConstruct %int1, %int1_45 : (!torch.int, !torch.int) -> !torch.list<int>
%267 = torch.prim.ListConstruct %int2, %int2_46 : (!torch.int, !torch.int) -> !torch.list<int>
%268 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
%false = torch.constant.bool false
%int1_47 = torch.constant.int 1
%269 = torch_c.to_i64 %int1_47
%270 = torch_c.to_i64 %int3
%271 = torch_c.to_i64 %int3_44
%272 = torch_c.to_i64 %int0
%273 = torch_c.to_i64 %int0
%c0_48 = arith.constant 0 : index
%c1_49 = arith.constant 1 : index
%c1_50 = arith.constant 1 : index
%c3_51 = arith.constant 3 : index
%c2_52 = arith.constant 2 : index
%c224_53 = arith.constant 224 : index
%c3_54 = arith.constant 3 : index
%c224_55 = arith.constant 224 : index
%c0_56 = arith.constant 0 : index
%c64_57 = arith.constant 64 : index
%c1_58 = arith.constant 1 : index
%c3_59 = arith.constant 3 : index
%c2_60 = arith.constant 2 : index
%c7_61 = arith.constant 7 : index
%c3_62 = arith.constant 3 : index
%c7_63 = arith.constant 7 : index
%274 = arith.index_cast %269 : i64 to index
%c0_64 = arith.constant 0 : index
%275 = arith.remsi %c3_51, %274 : index
%276 = arith.cmpi eq, %c0_64, %275 : index
cf.assert %276, "invalid: groups must divide input channel size evenly."
%c0_65 = arith.constant 0 : index
%277 = arith.remsi %c64_57, %274 : index
%278 = arith.cmpi eq, %c0_65, %277 : index
cf.assert %278, "invalid: groups must divide weight batch size evenly."
%c1_i64 = arith.constant 1 : i64
%c1_i64_66 = arith.constant 1 : i64
%c2_i64 = arith.constant 2 : i64
%c2_i64_67 = arith.constant 2 : i64
%cst = arith.constant 0.000000e+00 : f32
%c0_68 = arith.constant 0 : index
%c1_69 = arith.constant 1 : index
%c1_70 = arith.constant 1 : index
%c3_71 = arith.constant 3 : index
%c2_72 = arith.constant 2 : index
%c224_73 = arith.constant 224 : index
%c3_74 = arith.constant 3 : index
%c224_75 = arith.constant 224 : index
%c0_i64 = arith.constant 0 : i64
%279 = arith.index_cast %c0_i64 : i64 to index
%280 = arith.index_cast %c0_i64 : i64 to index
%281 = arith.index_cast %270 : i64 to index
%282 = arith.index_cast %271 : i64 to index
%padded = tensor.pad %cast_12 low[%279, %280, %281, %282] high[%279, %280, %281, %282] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst : f32
} : tensor<1x3x224x224xf32> to tensor<?x?x?x?xf32>
%283 = arith.index_cast %c7_61 : index to i64
%c1_i64_76 = arith.constant 1 : i64
%c2_i64_77 = arith.constant 2 : i64
%284 = arith.muli %270, %c2_i64_77 : i64
%285 = arith.index_cast %c224_53 : index to i64
%286 = arith.addi %285, %284 : i64
%287 = arith.subi %283, %c1_i64_76 : i64
%288 = arith.muli %c1_i64, %287 : i64
%289 = arith.subi %286, %288 : i64
%290 = arith.subi %289, %c1_i64_76 : i64
%291 = arith.floordivsi %290, %c2_i64 : i64
%292 = arith.addi %291, %c1_i64_76 : i64
%293 = arith.index_cast %292 : i64 to index
%294 = arith.index_cast %c7_63 : index to i64
%c1_i64_78 = arith.constant 1 : i64
%c2_i64_79 = arith.constant 2 : i64
%295 = arith.muli %271, %c2_i64_79 : i64
%296 = arith.index_cast %c224_55 : index to i64
%297 = arith.addi %296, %295 : i64
%298 = arith.subi %294, %c1_i64_78 : i64
%299 = arith.muli %c1_i64_66, %298 : i64
%300 = arith.subi %297, %299 : i64
%301 = arith.subi %300, %c1_i64_78 : i64
%302 = arith.floordivsi %301, %c2_i64_67 : i64
%303 = arith.addi %302, %c1_i64_78 : i64
%304 = arith.index_cast %303 : i64 to index
%305 = tensor.empty(%293, %304) : tensor<1x64x?x?xf32>
%306 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_43 : tensor<64xf32>) outs(%305 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%307 = arith.floordivsi %c3_51, %274 : index
%308 = arith.floordivsi %c64_57, %274 : index
%c0_80 = arith.constant 0 : index
%c1_81 = arith.constant 1 : index
%309 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded, %cast_32 : tensor<?x?x?x?xf32>, tensor<64x3x7x7xf32>) outs(%306 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_82 = tensor.cast %309 : tensor<1x64x?x?xf32> to tensor<1x64x112x112xf32>
%c1_83 = arith.constant 1 : index
%c1_84 = arith.constant 1 : index
%c64_85 = arith.constant 64 : index
%c2_86 = arith.constant 2 : index
%c112 = arith.constant 112 : index
%c3_87 = arith.constant 3 : index
%c112_88 = arith.constant 112 : index
%310 = tensor.empty() : tensor<1x64x112x112xf32>
%311 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_82 : tensor<1x64x112x112xf32>) outs(%310 : tensor<1x64x112x112xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x112x112xf32>
%cast_89 = tensor.cast %311 : tensor<1x64x112x112xf32> to tensor<1x64x112x112xf32>
%312 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%313 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_90 = torch.constant.int 12
%314 = torch.aten.item %312 : !torch.vtensor<[],f32> -> !torch.float
%315 = torch_c.to_f64 %314
%316 = torch.aten.item %313 : !torch.vtensor<[],si8> -> !torch.int
%317 = torch_c.to_i64 %316
%c1_91 = arith.constant 1 : index
%c1_92 = arith.constant 1 : index
%c64_93 = arith.constant 64 : index
%c2_94 = arith.constant 2 : index
%c112_95 = arith.constant 112 : index
%c3_96 = arith.constant 3 : index
%c112_97 = arith.constant 112 : index
%318 = tensor.empty() : tensor<1x64x112x112xi8>
%319 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_89 : tensor<1x64x112x112xf32>) outs(%318 : tensor<1x64x112x112xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %316
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %314
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x112x112xi8>
%cast_98 = tensor.cast %319 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
%cast_99 = tensor.cast %cast_98 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
%320 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%321 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%322 = torch.aten.item %320 : !torch.vtensor<[],f32> -> !torch.float
%323 = torch_c.to_f64 %322
%324 = torch.aten.item %321 : !torch.vtensor<[],si8> -> !torch.int
%325 = torch_c.to_i64 %324
%cast_100 = tensor.cast %cast_99 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
%c1_101 = arith.constant 1 : index
%c1_102 = arith.constant 1 : index
%c64_103 = arith.constant 64 : index
%c2_104 = arith.constant 2 : index
%c112_105 = arith.constant 112 : index
%c3_106 = arith.constant 3 : index
%c112_107 = arith.constant 112 : index
%326 = tensor.empty() : tensor<1x64x112x112xf32>
%327 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_100 : tensor<1x64x112x112xi8>) outs(%326 : tensor<1x64x112x112xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %324
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %322
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x112x112xf32>
%cast_108 = tensor.cast %327 : tensor<1x64x112x112xf32> to tensor<1x64x112x112xf32>
%int3_109 = torch.constant.int 3
%int3_110 = torch.constant.int 3
%328 = torch.prim.ListConstruct %int3_109, %int3_110 : (!torch.int, !torch.int) -> !torch.list<int>
%int1_111 = torch.constant.int 1
%int1_112 = torch.constant.int 1
%329 = torch.prim.ListConstruct %int1_111, %int1_112 : (!torch.int, !torch.int) -> !torch.list<int>
%int2_113 = torch.constant.int 2
%int2_114 = torch.constant.int 2
%330 = torch.prim.ListConstruct %int2_113, %int2_114 : (!torch.int, !torch.int) -> !torch.list<int>
%int1_115 = torch.constant.int 1
%int1_116 = torch.constant.int 1
%331 = torch.prim.ListConstruct %int1_115, %int1_116 : (!torch.int, !torch.int) -> !torch.list<int>
%false_117 = torch.constant.bool false
%332 = torch_c.to_i64 %int3_109
%333 = torch_c.to_i64 %int3_110
%cst_118 = arith.constant 0xFF800000 : f32
%padded_119 = tensor.pad %cast_108 low[0, 0, 1, 1] high[0, 0, 1, 1] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_118 : f32
} : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32>
%c0_120 = arith.constant 0 : index
%c1_121 = arith.constant 1 : index
%c1_122 = arith.constant 1 : index
%c64_123 = arith.constant 64 : index
%c1_i64_124 = arith.constant 1 : i64
%c1_i64_125 = arith.constant 1 : i64
%c1_i64_126 = arith.constant 1 : i64
%c1_i64_127 = arith.constant 1 : i64
%c2_i64_128 = arith.constant 2 : i64
%c2_i64_129 = arith.constant 2 : i64
%c3_130 = arith.constant 3 : index
%c112_131 = arith.constant 112 : index
%c1_i64_132 = arith.constant 1 : i64
%c2_i64_133 = arith.constant 2 : i64
%334 = arith.muli %c1_i64_125, %c2_i64_133 : i64
%335 = arith.index_cast %c112_131 : index to i64
%336 = arith.addi %335, %334 : i64
%337 = arith.subi %333, %c1_i64_132 : i64
%338 = arith.muli %c1_i64_127, %337 : i64
%339 = arith.subi %336, %338 : i64
%340 = arith.subi %339, %c1_i64_132 : i64
%341 = arith.floordivsi %340, %c2_i64_129 : i64
%342 = arith.addi %341, %c1_i64_132 : i64
%343 = arith.index_cast %342 : i64 to index
%c2_134 = arith.constant 2 : index
%c112_135 = arith.constant 112 : index
%c1_i64_136 = arith.constant 1 : i64
%c2_i64_137 = arith.constant 2 : i64
%344 = arith.muli %c1_i64_124, %c2_i64_137 : i64
%345 = arith.index_cast %c112_135 : index to i64
%346 = arith.addi %345, %344 : i64
%347 = arith.subi %332, %c1_i64_136 : i64
%348 = arith.muli %c1_i64_126, %347 : i64
%349 = arith.subi %346, %348 : i64
%350 = arith.subi %349, %c1_i64_136 : i64
%351 = arith.floordivsi %350, %c2_i64_128 : i64
%352 = arith.addi %351, %c1_i64_136 : i64
%353 = arith.index_cast %352 : i64 to index
%354 = tensor.empty(%353, %343) : tensor<1x64x?x?xf32>
%355 = linalg.fill ins(%cst_118 : f32) outs(%354 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%356 = arith.index_cast %332 : i64 to index
%357 = arith.index_cast %333 : i64 to index
%358 = tensor.empty(%356, %357) : tensor<?x?xf32>
%359 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_119, %358 : tensor<1x64x114x114xf32>, tensor<?x?xf32>) outs(%355 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_138 = tensor.cast %359 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%360 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%361 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_139 = torch.constant.int 12
%362 = torch.aten.item %360 : !torch.vtensor<[],f32> -> !torch.float
%363 = torch_c.to_f64 %362
%364 = torch.aten.item %361 : !torch.vtensor<[],si8> -> !torch.int
%365 = torch_c.to_i64 %364
%c1_140 = arith.constant 1 : index
%c1_141 = arith.constant 1 : index
%c64_142 = arith.constant 64 : index
%c2_143 = arith.constant 2 : index
%c56 = arith.constant 56 : index
%c3_144 = arith.constant 3 : index
%c56_145 = arith.constant 56 : index
%366 = tensor.empty() : tensor<1x64x56x56xi8>
%367 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_138 : tensor<1x64x56x56xf32>) outs(%366 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %364
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %362
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_146 = tensor.cast %367 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_147 = tensor.cast %cast_146 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%368 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%369 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%370 = torch.aten.item %368 : !torch.vtensor<[],f32> -> !torch.float
%371 = torch_c.to_f64 %370
%372 = torch.aten.item %369 : !torch.vtensor<[],si8> -> !torch.int
%373 = torch_c.to_i64 %372
%cast_148 = tensor.cast %cast_147 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_149 = arith.constant 1 : index
%c1_150 = arith.constant 1 : index
%c64_151 = arith.constant 64 : index
%c2_152 = arith.constant 2 : index
%c56_153 = arith.constant 56 : index
%c3_154 = arith.constant 3 : index
%c56_155 = arith.constant 56 : index
%374 = tensor.empty() : tensor<1x64x56x56xf32>
%375 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_148 : tensor<1x64x56x56xi8>) outs(%374 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %372
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %370
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_156 = tensor.cast %375 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%376 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%377 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_157 = torch.constant.int 12
%378 = torch.aten.item %376 : !torch.vtensor<[],f32> -> !torch.float
%379 = torch_c.to_f64 %378
%380 = torch.aten.item %377 : !torch.vtensor<[],si8> -> !torch.int
%381 = torch_c.to_i64 %380
%c1_158 = arith.constant 1 : index
%c0_159 = arith.constant 0 : index
%c64_160 = arith.constant 64 : index
%c1_161 = arith.constant 1 : index
%c64_162 = arith.constant 64 : index
%382 = tensor.empty() : tensor<64x64x1x1xi8>
%383 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<64x64x1x1xf32>) outs(%382 : tensor<64x64x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %380
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %378
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x64x1x1xi8>
%cast_163 = tensor.cast %383 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
%cast_164 = tensor.cast %cast_163 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
%384 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%385 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%386 = torch.aten.item %384 : !torch.vtensor<[],f32> -> !torch.float
%387 = torch_c.to_f64 %386
%388 = torch.aten.item %385 : !torch.vtensor<[],si8> -> !torch.int
%389 = torch_c.to_i64 %388
%cast_165 = tensor.cast %cast_164 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
%c1_166 = arith.constant 1 : index
%c0_167 = arith.constant 0 : index
%c64_168 = arith.constant 64 : index
%c1_169 = arith.constant 1 : index
%c64_170 = arith.constant 64 : index
%390 = tensor.empty() : tensor<64x64x1x1xf32>
%391 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_165 : tensor<64x64x1x1xi8>) outs(%390 : tensor<64x64x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %388
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %386
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x64x1x1xf32>
%cast_171 = tensor.cast %391 : tensor<64x64x1x1xf32> to tensor<64x64x1x1xf32>
%392 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%393 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_172 = torch.constant.int 12
%394 = torch.aten.item %392 : !torch.vtensor<[],f32> -> !torch.float
%395 = torch_c.to_f64 %394
%396 = torch.aten.item %393 : !torch.vtensor<[],si8> -> !torch.int
%397 = torch_c.to_i64 %396
%c1_173 = arith.constant 1 : index
%c0_174 = arith.constant 0 : index
%c64_175 = arith.constant 64 : index
%398 = tensor.empty() : tensor<64xi8>
%399 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%8 : tensor<64xf32>) outs(%398 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %396
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %394
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_176 = tensor.cast %399 : tensor<64xi8> to tensor<64xi8>
%cast_177 = tensor.cast %cast_176 : tensor<64xi8> to tensor<64xi8>
%400 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%401 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%402 = torch.aten.item %400 : !torch.vtensor<[],f32> -> !torch.float
%403 = torch_c.to_f64 %402
%404 = torch.aten.item %401 : !torch.vtensor<[],si8> -> !torch.int
%405 = torch_c.to_i64 %404
%cast_178 = tensor.cast %cast_177 : tensor<64xi8> to tensor<64xi8>
%c1_179 = arith.constant 1 : index
%c0_180 = arith.constant 0 : index
%c64_181 = arith.constant 64 : index
%406 = tensor.empty() : tensor<64xf32>
%407 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_178 : tensor<64xi8>) outs(%406 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %404
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %402
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_182 = tensor.cast %407 : tensor<64xf32> to tensor<64xf32>
%int0_183 = torch.constant.int 0
%int0_184 = torch.constant.int 0
%int1_185 = torch.constant.int 1
%int1_186 = torch.constant.int 1
%int1_187 = torch.constant.int 1
%int1_188 = torch.constant.int 1
%int0_189 = torch.constant.int 0
%408 = torch.prim.ListConstruct %int0_183, %int0_184 : (!torch.int, !torch.int) -> !torch.list<int>
%409 = torch.prim.ListConstruct %int1_185, %int1_186 : (!torch.int, !torch.int) -> !torch.list<int>
%410 = torch.prim.ListConstruct %int1_187, %int1_188 : (!torch.int, !torch.int) -> !torch.list<int>
%411 = torch.prim.ListConstruct %int0_189, %int0_189 : (!torch.int, !torch.int) -> !torch.list<int>
%false_190 = torch.constant.bool false
%int1_191 = torch.constant.int 1
%412 = torch_c.to_i64 %int1_191
%413 = torch_c.to_i64 %int0_183
%414 = torch_c.to_i64 %int0_184
%415 = torch_c.to_i64 %int0_189
%416 = torch_c.to_i64 %int0_189
%c0_192 = arith.constant 0 : index
%c1_193 = arith.constant 1 : index
%c1_194 = arith.constant 1 : index
%c64_195 = arith.constant 64 : index
%c2_196 = arith.constant 2 : index
%c56_197 = arith.constant 56 : index
%c3_198 = arith.constant 3 : index
%c56_199 = arith.constant 56 : index
%c0_200 = arith.constant 0 : index
%c64_201 = arith.constant 64 : index
%c1_202 = arith.constant 1 : index
%c64_203 = arith.constant 64 : index
%c2_204 = arith.constant 2 : index
%c1_205 = arith.constant 1 : index
%c3_206 = arith.constant 3 : index
%c1_207 = arith.constant 1 : index
%417 = arith.index_cast %412 : i64 to index
%c0_208 = arith.constant 0 : index
%418 = arith.remsi %c64_195, %417 : index
%419 = arith.cmpi eq, %c0_208, %418 : index
cf.assert %419, "invalid: groups must divide input channel size evenly."
%c0_209 = arith.constant 0 : index
%420 = arith.remsi %c64_201, %417 : index
%421 = arith.cmpi eq, %c0_209, %420 : index
cf.assert %421, "invalid: groups must divide weight batch size evenly."
%c1_i64_210 = arith.constant 1 : i64
%c1_i64_211 = arith.constant 1 : i64
%c1_i64_212 = arith.constant 1 : i64
%c1_i64_213 = arith.constant 1 : i64
%cst_214 = arith.constant 0.000000e+00 : f32
%c0_215 = arith.constant 0 : index
%c1_216 = arith.constant 1 : index
%c1_217 = arith.constant 1 : index
%c64_218 = arith.constant 64 : index
%c2_219 = arith.constant 2 : index
%c56_220 = arith.constant 56 : index
%c3_221 = arith.constant 3 : index
%c56_222 = arith.constant 56 : index
%c0_i64_223 = arith.constant 0 : i64
%422 = arith.index_cast %c0_i64_223 : i64 to index
%423 = arith.index_cast %c0_i64_223 : i64 to index
%424 = arith.index_cast %413 : i64 to index
%425 = arith.index_cast %414 : i64 to index
%padded_224 = tensor.pad %cast_156 low[%422, %423, %424, %425] high[%422, %423, %424, %425] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_214 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%426 = arith.index_cast %c1_205 : index to i64
%c1_i64_225 = arith.constant 1 : i64
%c2_i64_226 = arith.constant 2 : i64
%427 = arith.muli %413, %c2_i64_226 : i64
%428 = arith.index_cast %c56_197 : index to i64
%429 = arith.addi %428, %427 : i64
%430 = arith.subi %426, %c1_i64_225 : i64
%431 = arith.muli %c1_i64_210, %430 : i64
%432 = arith.subi %429, %431 : i64
%433 = arith.subi %432, %c1_i64_225 : i64
%434 = arith.floordivsi %433, %c1_i64_212 : i64
%435 = arith.addi %434, %c1_i64_225 : i64
%436 = arith.index_cast %435 : i64 to index
%437 = arith.index_cast %c1_207 : index to i64
%c1_i64_227 = arith.constant 1 : i64
%c2_i64_228 = arith.constant 2 : i64
%438 = arith.muli %414, %c2_i64_228 : i64
%439 = arith.index_cast %c56_199 : index to i64
%440 = arith.addi %439, %438 : i64
%441 = arith.subi %437, %c1_i64_227 : i64
%442 = arith.muli %c1_i64_211, %441 : i64
%443 = arith.subi %440, %442 : i64
%444 = arith.subi %443, %c1_i64_227 : i64
%445 = arith.floordivsi %444, %c1_i64_213 : i64
%446 = arith.addi %445, %c1_i64_227 : i64
%447 = arith.index_cast %446 : i64 to index
%448 = tensor.empty(%436, %447) : tensor<1x64x?x?xf32>
%449 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_182 : tensor<64xf32>) outs(%448 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%450 = arith.floordivsi %c64_195, %417 : index
%451 = arith.floordivsi %c64_201, %417 : index
%c0_229 = arith.constant 0 : index
%c1_230 = arith.constant 1 : index
%452 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_224, %cast_171 : tensor<?x?x?x?xf32>, tensor<64x64x1x1xf32>) outs(%449 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_231 = tensor.cast %452 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_232 = arith.constant 1 : index
%c1_233 = arith.constant 1 : index
%c64_234 = arith.constant 64 : index
%c2_235 = arith.constant 2 : index
%c56_236 = arith.constant 56 : index
%c3_237 = arith.constant 3 : index
%c56_238 = arith.constant 56 : index
%453 = tensor.empty() : tensor<1x64x56x56xf32>
%454 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_231 : tensor<1x64x56x56xf32>) outs(%453 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_239 = tensor.cast %454 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%455 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%456 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_240 = torch.constant.int 12
%457 = torch.aten.item %455 : !torch.vtensor<[],f32> -> !torch.float
%458 = torch_c.to_f64 %457
%459 = torch.aten.item %456 : !torch.vtensor<[],si8> -> !torch.int
%460 = torch_c.to_i64 %459
%c1_241 = arith.constant 1 : index
%c1_242 = arith.constant 1 : index
%c64_243 = arith.constant 64 : index
%c2_244 = arith.constant 2 : index
%c56_245 = arith.constant 56 : index
%c3_246 = arith.constant 3 : index
%c56_247 = arith.constant 56 : index
%461 = tensor.empty() : tensor<1x64x56x56xi8>
%462 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_239 : tensor<1x64x56x56xf32>) outs(%461 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %459
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %457
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_248 = tensor.cast %462 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_249 = tensor.cast %cast_248 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%463 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%464 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%465 = torch.aten.item %463 : !torch.vtensor<[],f32> -> !torch.float
%466 = torch_c.to_f64 %465
%467 = torch.aten.item %464 : !torch.vtensor<[],si8> -> !torch.int
%468 = torch_c.to_i64 %467
%cast_250 = tensor.cast %cast_249 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_251 = arith.constant 1 : index
%c1_252 = arith.constant 1 : index
%c64_253 = arith.constant 64 : index
%c2_254 = arith.constant 2 : index
%c56_255 = arith.constant 56 : index
%c3_256 = arith.constant 3 : index
%c56_257 = arith.constant 56 : index
%469 = tensor.empty() : tensor<1x64x56x56xf32>
%470 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_250 : tensor<1x64x56x56xi8>) outs(%469 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %467
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %465
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_258 = tensor.cast %470 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%471 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%472 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_259 = torch.constant.int 12
%473 = torch.aten.item %471 : !torch.vtensor<[],f32> -> !torch.float
%474 = torch_c.to_f64 %473
%475 = torch.aten.item %472 : !torch.vtensor<[],si8> -> !torch.int
%476 = torch_c.to_i64 %475
%c1_260 = arith.constant 1 : index
%c0_261 = arith.constant 0 : index
%c64_262 = arith.constant 64 : index
%c1_263 = arith.constant 1 : index
%c64_264 = arith.constant 64 : index
%c2_265 = arith.constant 2 : index
%c3_266 = arith.constant 3 : index
%c3_267 = arith.constant 3 : index
%c3_268 = arith.constant 3 : index
%477 = tensor.empty() : tensor<64x64x3x3xi8>
%478 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10 : tensor<64x64x3x3xf32>) outs(%477 : tensor<64x64x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %475
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %473
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x64x3x3xi8>
%cast_269 = tensor.cast %478 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%cast_270 = tensor.cast %cast_269 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%479 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%480 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%481 = torch.aten.item %479 : !torch.vtensor<[],f32> -> !torch.float
%482 = torch_c.to_f64 %481
%483 = torch.aten.item %480 : !torch.vtensor<[],si8> -> !torch.int
%484 = torch_c.to_i64 %483
%cast_271 = tensor.cast %cast_270 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%c1_272 = arith.constant 1 : index
%c0_273 = arith.constant 0 : index
%c64_274 = arith.constant 64 : index
%c1_275 = arith.constant 1 : index
%c64_276 = arith.constant 64 : index
%c2_277 = arith.constant 2 : index
%c3_278 = arith.constant 3 : index
%c3_279 = arith.constant 3 : index
%c3_280 = arith.constant 3 : index
%485 = tensor.empty() : tensor<64x64x3x3xf32>
%486 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_271 : tensor<64x64x3x3xi8>) outs(%485 : tensor<64x64x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %483
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %481
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x64x3x3xf32>
%cast_281 = tensor.cast %486 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
%487 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%488 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_282 = torch.constant.int 12
%489 = torch.aten.item %487 : !torch.vtensor<[],f32> -> !torch.float
%490 = torch_c.to_f64 %489
%491 = torch.aten.item %488 : !torch.vtensor<[],si8> -> !torch.int
%492 = torch_c.to_i64 %491
%c1_283 = arith.constant 1 : index
%c0_284 = arith.constant 0 : index
%c64_285 = arith.constant 64 : index
%493 = tensor.empty() : tensor<64xi8>
%494 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%493 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %491
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %489
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_286 = tensor.cast %494 : tensor<64xi8> to tensor<64xi8>
%cast_287 = tensor.cast %cast_286 : tensor<64xi8> to tensor<64xi8>
%495 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%496 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%497 = torch.aten.item %495 : !torch.vtensor<[],f32> -> !torch.float
%498 = torch_c.to_f64 %497
%499 = torch.aten.item %496 : !torch.vtensor<[],si8> -> !torch.int
%500 = torch_c.to_i64 %499
%cast_288 = tensor.cast %cast_287 : tensor<64xi8> to tensor<64xi8>
%c1_289 = arith.constant 1 : index
%c0_290 = arith.constant 0 : index
%c64_291 = arith.constant 64 : index
%501 = tensor.empty() : tensor<64xf32>
%502 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_288 : tensor<64xi8>) outs(%501 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %499
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %497
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_292 = tensor.cast %502 : tensor<64xf32> to tensor<64xf32>
%int1_293 = torch.constant.int 1
%int1_294 = torch.constant.int 1
%int1_295 = torch.constant.int 1
%int1_296 = torch.constant.int 1
%int1_297 = torch.constant.int 1
%int1_298 = torch.constant.int 1
%int0_299 = torch.constant.int 0
%503 = torch.prim.ListConstruct %int1_293, %int1_294 : (!torch.int, !torch.int) -> !torch.list<int>
%504 = torch.prim.ListConstruct %int1_295, %int1_296 : (!torch.int, !torch.int) -> !torch.list<int>
%505 = torch.prim.ListConstruct %int1_297, %int1_298 : (!torch.int, !torch.int) -> !torch.list<int>
%506 = torch.prim.ListConstruct %int0_299, %int0_299 : (!torch.int, !torch.int) -> !torch.list<int>
%false_300 = torch.constant.bool false
%int1_301 = torch.constant.int 1
%507 = torch_c.to_i64 %int1_301
%508 = torch_c.to_i64 %int1_293
%509 = torch_c.to_i64 %int1_294
%510 = torch_c.to_i64 %int0_299
%511 = torch_c.to_i64 %int0_299
%c0_302 = arith.constant 0 : index
%c1_303 = arith.constant 1 : index
%c1_304 = arith.constant 1 : index
%c64_305 = arith.constant 64 : index
%c2_306 = arith.constant 2 : index
%c56_307 = arith.constant 56 : index
%c3_308 = arith.constant 3 : index
%c56_309 = arith.constant 56 : index
%c0_310 = arith.constant 0 : index
%c64_311 = arith.constant 64 : index
%c1_312 = arith.constant 1 : index
%c64_313 = arith.constant 64 : index
%c2_314 = arith.constant 2 : index
%c3_315 = arith.constant 3 : index
%c3_316 = arith.constant 3 : index
%c3_317 = arith.constant 3 : index
%512 = arith.index_cast %507 : i64 to index
%c0_318 = arith.constant 0 : index
%513 = arith.remsi %c64_305, %512 : index
%514 = arith.cmpi eq, %c0_318, %513 : index
cf.assert %514, "invalid: groups must divide input channel size evenly."
%c0_319 = arith.constant 0 : index
%515 = arith.remsi %c64_311, %512 : index
%516 = arith.cmpi eq, %c0_319, %515 : index
cf.assert %516, "invalid: groups must divide weight batch size evenly."
%c1_i64_320 = arith.constant 1 : i64
%c1_i64_321 = arith.constant 1 : i64
%c1_i64_322 = arith.constant 1 : i64
%c1_i64_323 = arith.constant 1 : i64
%cst_324 = arith.constant 0.000000e+00 : f32
%c0_325 = arith.constant 0 : index
%c1_326 = arith.constant 1 : index
%c1_327 = arith.constant 1 : index
%c64_328 = arith.constant 64 : index
%c2_329 = arith.constant 2 : index
%c56_330 = arith.constant 56 : index
%c3_331 = arith.constant 3 : index
%c56_332 = arith.constant 56 : index
%c0_i64_333 = arith.constant 0 : i64
%517 = arith.index_cast %c0_i64_333 : i64 to index
%518 = arith.index_cast %c0_i64_333 : i64 to index
%519 = arith.index_cast %508 : i64 to index
%520 = arith.index_cast %509 : i64 to index
%padded_334 = tensor.pad %cast_258 low[%517, %518, %519, %520] high[%517, %518, %519, %520] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_324 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%521 = arith.index_cast %c3_315 : index to i64
%c1_i64_335 = arith.constant 1 : i64
%c2_i64_336 = arith.constant 2 : i64
%522 = arith.muli %508, %c2_i64_336 : i64
%523 = arith.index_cast %c56_307 : index to i64
%524 = arith.addi %523, %522 : i64
%525 = arith.subi %521, %c1_i64_335 : i64
%526 = arith.muli %c1_i64_320, %525 : i64
%527 = arith.subi %524, %526 : i64
%528 = arith.subi %527, %c1_i64_335 : i64
%529 = arith.floordivsi %528, %c1_i64_322 : i64
%530 = arith.addi %529, %c1_i64_335 : i64
%531 = arith.index_cast %530 : i64 to index
%532 = arith.index_cast %c3_317 : index to i64
%c1_i64_337 = arith.constant 1 : i64
%c2_i64_338 = arith.constant 2 : i64
%533 = arith.muli %509, %c2_i64_338 : i64
%534 = arith.index_cast %c56_309 : index to i64
%535 = arith.addi %534, %533 : i64
%536 = arith.subi %532, %c1_i64_337 : i64
%537 = arith.muli %c1_i64_321, %536 : i64
%538 = arith.subi %535, %537 : i64
%539 = arith.subi %538, %c1_i64_337 : i64
%540 = arith.floordivsi %539, %c1_i64_323 : i64
%541 = arith.addi %540, %c1_i64_337 : i64
%542 = arith.index_cast %541 : i64 to index
%543 = tensor.empty(%531, %542) : tensor<1x64x?x?xf32>
%544 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_292 : tensor<64xf32>) outs(%543 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%545 = arith.floordivsi %c64_305, %512 : index
%546 = arith.floordivsi %c64_311, %512 : index
%c0_339 = arith.constant 0 : index
%c1_340 = arith.constant 1 : index
%547 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_334, %cast_281 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%544 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_341 = tensor.cast %547 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_342 = arith.constant 1 : index
%c1_343 = arith.constant 1 : index
%c64_344 = arith.constant 64 : index
%c2_345 = arith.constant 2 : index
%c56_346 = arith.constant 56 : index
%c3_347 = arith.constant 3 : index
%c56_348 = arith.constant 56 : index
%548 = tensor.empty() : tensor<1x64x56x56xf32>
%549 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_341 : tensor<1x64x56x56xf32>) outs(%548 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_349 = tensor.cast %549 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%550 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%551 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_350 = torch.constant.int 12
%552 = torch.aten.item %550 : !torch.vtensor<[],f32> -> !torch.float
%553 = torch_c.to_f64 %552
%554 = torch.aten.item %551 : !torch.vtensor<[],si8> -> !torch.int
%555 = torch_c.to_i64 %554
%c1_351 = arith.constant 1 : index
%c1_352 = arith.constant 1 : index
%c64_353 = arith.constant 64 : index
%c2_354 = arith.constant 2 : index
%c56_355 = arith.constant 56 : index
%c3_356 = arith.constant 3 : index
%c56_357 = arith.constant 56 : index
%556 = tensor.empty() : tensor<1x64x56x56xi8>
%557 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_349 : tensor<1x64x56x56xf32>) outs(%556 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %554
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %552
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_358 = tensor.cast %557 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_359 = tensor.cast %cast_358 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%558 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%559 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%560 = torch.aten.item %558 : !torch.vtensor<[],f32> -> !torch.float
%561 = torch_c.to_f64 %560
%562 = torch.aten.item %559 : !torch.vtensor<[],si8> -> !torch.int
%563 = torch_c.to_i64 %562
%cast_360 = tensor.cast %cast_359 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_361 = arith.constant 1 : index
%c1_362 = arith.constant 1 : index
%c64_363 = arith.constant 64 : index
%c2_364 = arith.constant 2 : index
%c56_365 = arith.constant 56 : index
%c3_366 = arith.constant 3 : index
%c56_367 = arith.constant 56 : index
%564 = tensor.empty() : tensor<1x64x56x56xf32>
%565 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_360 : tensor<1x64x56x56xi8>) outs(%564 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %562
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %560
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_368 = tensor.cast %565 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%566 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%567 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_369 = torch.constant.int 12
%568 = torch.aten.item %566 : !torch.vtensor<[],f32> -> !torch.float
%569 = torch_c.to_f64 %568
%570 = torch.aten.item %567 : !torch.vtensor<[],si8> -> !torch.int
%571 = torch_c.to_i64 %570
%c1_370 = arith.constant 1 : index
%c0_371 = arith.constant 0 : index
%c256 = arith.constant 256 : index
%c1_372 = arith.constant 1 : index
%c64_373 = arith.constant 64 : index
%572 = tensor.empty() : tensor<256x64x1x1xi8>
%573 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<256x64x1x1xf32>) outs(%572 : tensor<256x64x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %570
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %568
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x64x1x1xi8>
%cast_374 = tensor.cast %573 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%cast_375 = tensor.cast %cast_374 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%574 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%575 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%576 = torch.aten.item %574 : !torch.vtensor<[],f32> -> !torch.float
%577 = torch_c.to_f64 %576
%578 = torch.aten.item %575 : !torch.vtensor<[],si8> -> !torch.int
%579 = torch_c.to_i64 %578
%cast_376 = tensor.cast %cast_375 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%c1_377 = arith.constant 1 : index
%c0_378 = arith.constant 0 : index
%c256_379 = arith.constant 256 : index
%c1_380 = arith.constant 1 : index
%c64_381 = arith.constant 64 : index
%580 = tensor.empty() : tensor<256x64x1x1xf32>
%581 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_376 : tensor<256x64x1x1xi8>) outs(%580 : tensor<256x64x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %578
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %576
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x64x1x1xf32>
%cast_382 = tensor.cast %581 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
%582 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%583 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_383 = torch.constant.int 12
%584 = torch.aten.item %582 : !torch.vtensor<[],f32> -> !torch.float
%585 = torch_c.to_f64 %584
%586 = torch.aten.item %583 : !torch.vtensor<[],si8> -> !torch.int
%587 = torch_c.to_i64 %586
%c1_384 = arith.constant 1 : index
%c0_385 = arith.constant 0 : index
%c256_386 = arith.constant 256 : index
%588 = tensor.empty() : tensor<256xi8>
%589 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%16 : tensor<256xf32>) outs(%588 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %586
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %584
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_387 = tensor.cast %589 : tensor<256xi8> to tensor<256xi8>
%cast_388 = tensor.cast %cast_387 : tensor<256xi8> to tensor<256xi8>
%590 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%591 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%592 = torch.aten.item %590 : !torch.vtensor<[],f32> -> !torch.float
%593 = torch_c.to_f64 %592
%594 = torch.aten.item %591 : !torch.vtensor<[],si8> -> !torch.int
%595 = torch_c.to_i64 %594
%cast_389 = tensor.cast %cast_388 : tensor<256xi8> to tensor<256xi8>
%c1_390 = arith.constant 1 : index
%c0_391 = arith.constant 0 : index
%c256_392 = arith.constant 256 : index
%596 = tensor.empty() : tensor<256xf32>
%597 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_389 : tensor<256xi8>) outs(%596 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %594
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %592
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_393 = tensor.cast %597 : tensor<256xf32> to tensor<256xf32>
%int0_394 = torch.constant.int 0
%int0_395 = torch.constant.int 0
%int1_396 = torch.constant.int 1
%int1_397 = torch.constant.int 1
%int1_398 = torch.constant.int 1
%int1_399 = torch.constant.int 1
%int0_400 = torch.constant.int 0
%598 = torch.prim.ListConstruct %int0_394, %int0_395 : (!torch.int, !torch.int) -> !torch.list<int>
%599 = torch.prim.ListConstruct %int1_396, %int1_397 : (!torch.int, !torch.int) -> !torch.list<int>
%600 = torch.prim.ListConstruct %int1_398, %int1_399 : (!torch.int, !torch.int) -> !torch.list<int>
%601 = torch.prim.ListConstruct %int0_400, %int0_400 : (!torch.int, !torch.int) -> !torch.list<int>
%false_401 = torch.constant.bool false
%int1_402 = torch.constant.int 1
%602 = torch_c.to_i64 %int1_402
%603 = torch_c.to_i64 %int0_394
%604 = torch_c.to_i64 %int0_395
%605 = torch_c.to_i64 %int0_400
%606 = torch_c.to_i64 %int0_400
%c0_403 = arith.constant 0 : index
%c1_404 = arith.constant 1 : index
%c1_405 = arith.constant 1 : index
%c64_406 = arith.constant 64 : index
%c2_407 = arith.constant 2 : index
%c56_408 = arith.constant 56 : index
%c3_409 = arith.constant 3 : index
%c56_410 = arith.constant 56 : index
%c0_411 = arith.constant 0 : index
%c256_412 = arith.constant 256 : index
%c1_413 = arith.constant 1 : index
%c64_414 = arith.constant 64 : index
%c2_415 = arith.constant 2 : index
%c1_416 = arith.constant 1 : index
%c3_417 = arith.constant 3 : index
%c1_418 = arith.constant 1 : index
%607 = arith.index_cast %602 : i64 to index
%c0_419 = arith.constant 0 : index
%608 = arith.remsi %c64_406, %607 : index
%609 = arith.cmpi eq, %c0_419, %608 : index
cf.assert %609, "invalid: groups must divide input channel size evenly."
%c0_420 = arith.constant 0 : index
%610 = arith.remsi %c256_412, %607 : index
%611 = arith.cmpi eq, %c0_420, %610 : index
cf.assert %611, "invalid: groups must divide weight batch size evenly."
%c1_i64_421 = arith.constant 1 : i64
%c1_i64_422 = arith.constant 1 : i64
%c1_i64_423 = arith.constant 1 : i64
%c1_i64_424 = arith.constant 1 : i64
%cst_425 = arith.constant 0.000000e+00 : f32
%c0_426 = arith.constant 0 : index
%c1_427 = arith.constant 1 : index
%c1_428 = arith.constant 1 : index
%c64_429 = arith.constant 64 : index
%c2_430 = arith.constant 2 : index
%c56_431 = arith.constant 56 : index
%c3_432 = arith.constant 3 : index
%c56_433 = arith.constant 56 : index
%c0_i64_434 = arith.constant 0 : i64
%612 = arith.index_cast %c0_i64_434 : i64 to index
%613 = arith.index_cast %c0_i64_434 : i64 to index
%614 = arith.index_cast %603 : i64 to index
%615 = arith.index_cast %604 : i64 to index
%padded_435 = tensor.pad %cast_368 low[%612, %613, %614, %615] high[%612, %613, %614, %615] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_425 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%616 = arith.index_cast %c1_416 : index to i64
%c1_i64_436 = arith.constant 1 : i64
%c2_i64_437 = arith.constant 2 : i64
%617 = arith.muli %603, %c2_i64_437 : i64
%618 = arith.index_cast %c56_408 : index to i64
%619 = arith.addi %618, %617 : i64
%620 = arith.subi %616, %c1_i64_436 : i64
%621 = arith.muli %c1_i64_421, %620 : i64
%622 = arith.subi %619, %621 : i64
%623 = arith.subi %622, %c1_i64_436 : i64
%624 = arith.floordivsi %623, %c1_i64_423 : i64
%625 = arith.addi %624, %c1_i64_436 : i64
%626 = arith.index_cast %625 : i64 to index
%627 = arith.index_cast %c1_418 : index to i64
%c1_i64_438 = arith.constant 1 : i64
%c2_i64_439 = arith.constant 2 : i64
%628 = arith.muli %604, %c2_i64_439 : i64
%629 = arith.index_cast %c56_410 : index to i64
%630 = arith.addi %629, %628 : i64
%631 = arith.subi %627, %c1_i64_438 : i64
%632 = arith.muli %c1_i64_422, %631 : i64
%633 = arith.subi %630, %632 : i64
%634 = arith.subi %633, %c1_i64_438 : i64
%635 = arith.floordivsi %634, %c1_i64_424 : i64
%636 = arith.addi %635, %c1_i64_438 : i64
%637 = arith.index_cast %636 : i64 to index
%638 = tensor.empty(%626, %637) : tensor<1x256x?x?xf32>
%639 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_393 : tensor<256xf32>) outs(%638 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%640 = arith.floordivsi %c64_406, %607 : index
%641 = arith.floordivsi %c256_412, %607 : index
%c0_440 = arith.constant 0 : index
%c1_441 = arith.constant 1 : index
%642 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_435, %cast_382 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%639 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_442 = tensor.cast %642 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
%643 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%644 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_443 = torch.constant.int 12
%645 = torch.aten.item %643 : !torch.vtensor<[],f32> -> !torch.float
%646 = torch_c.to_f64 %645
%647 = torch.aten.item %644 : !torch.vtensor<[],si8> -> !torch.int
%648 = torch_c.to_i64 %647
%c1_444 = arith.constant 1 : index
%c1_445 = arith.constant 1 : index
%c256_446 = arith.constant 256 : index
%c2_447 = arith.constant 2 : index
%c56_448 = arith.constant 56 : index
%c3_449 = arith.constant 3 : index
%c56_450 = arith.constant 56 : index
%649 = tensor.empty() : tensor<1x256x56x56xi8>
%650 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_442 : tensor<1x256x56x56xf32>) outs(%649 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %647
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %645
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_451 = tensor.cast %650 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_452 = tensor.cast %cast_451 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%651 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%652 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%653 = torch.aten.item %651 : !torch.vtensor<[],f32> -> !torch.float
%654 = torch_c.to_f64 %653
%655 = torch.aten.item %652 : !torch.vtensor<[],si8> -> !torch.int
%656 = torch_c.to_i64 %655
%cast_453 = tensor.cast %cast_452 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_454 = arith.constant 1 : index
%c1_455 = arith.constant 1 : index
%c256_456 = arith.constant 256 : index
%c2_457 = arith.constant 2 : index
%c56_458 = arith.constant 56 : index
%c3_459 = arith.constant 3 : index
%c56_460 = arith.constant 56 : index
%657 = tensor.empty() : tensor<1x256x56x56xf32>
%658 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_453 : tensor<1x256x56x56xi8>) outs(%657 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %655
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %653
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_461 = tensor.cast %658 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%659 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%660 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_462 = torch.constant.int 12
%661 = torch.aten.item %659 : !torch.vtensor<[],f32> -> !torch.float
%662 = torch_c.to_f64 %661
%663 = torch.aten.item %660 : !torch.vtensor<[],si8> -> !torch.int
%664 = torch_c.to_i64 %663
%c1_463 = arith.constant 1 : index
%c0_464 = arith.constant 0 : index
%c256_465 = arith.constant 256 : index
%c1_466 = arith.constant 1 : index
%c64_467 = arith.constant 64 : index
%665 = tensor.empty() : tensor<256x64x1x1xi8>
%666 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18 : tensor<256x64x1x1xf32>) outs(%665 : tensor<256x64x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %663
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %661
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x64x1x1xi8>
%cast_468 = tensor.cast %666 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%cast_469 = tensor.cast %cast_468 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%667 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%668 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%669 = torch.aten.item %667 : !torch.vtensor<[],f32> -> !torch.float
%670 = torch_c.to_f64 %669
%671 = torch.aten.item %668 : !torch.vtensor<[],si8> -> !torch.int
%672 = torch_c.to_i64 %671
%cast_470 = tensor.cast %cast_469 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%c1_471 = arith.constant 1 : index
%c0_472 = arith.constant 0 : index
%c256_473 = arith.constant 256 : index
%c1_474 = arith.constant 1 : index
%c64_475 = arith.constant 64 : index
%673 = tensor.empty() : tensor<256x64x1x1xf32>
%674 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_470 : tensor<256x64x1x1xi8>) outs(%673 : tensor<256x64x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %671
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %669
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x64x1x1xf32>
%cast_476 = tensor.cast %674 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
%675 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%676 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_477 = torch.constant.int 12
%677 = torch.aten.item %675 : !torch.vtensor<[],f32> -> !torch.float
%678 = torch_c.to_f64 %677
%679 = torch.aten.item %676 : !torch.vtensor<[],si8> -> !torch.int
%680 = torch_c.to_i64 %679
%c1_478 = arith.constant 1 : index
%c0_479 = arith.constant 0 : index
%c256_480 = arith.constant 256 : index
%681 = tensor.empty() : tensor<256xi8>
%682 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%20 : tensor<256xf32>) outs(%681 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %679
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %677
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_481 = tensor.cast %682 : tensor<256xi8> to tensor<256xi8>
%cast_482 = tensor.cast %cast_481 : tensor<256xi8> to tensor<256xi8>
%683 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%684 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%685 = torch.aten.item %683 : !torch.vtensor<[],f32> -> !torch.float
%686 = torch_c.to_f64 %685
%687 = torch.aten.item %684 : !torch.vtensor<[],si8> -> !torch.int
%688 = torch_c.to_i64 %687
%cast_483 = tensor.cast %cast_482 : tensor<256xi8> to tensor<256xi8>
%c1_484 = arith.constant 1 : index
%c0_485 = arith.constant 0 : index
%c256_486 = arith.constant 256 : index
%689 = tensor.empty() : tensor<256xf32>
%690 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_483 : tensor<256xi8>) outs(%689 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %687
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %685
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_487 = tensor.cast %690 : tensor<256xf32> to tensor<256xf32>
%int0_488 = torch.constant.int 0
%int0_489 = torch.constant.int 0
%int1_490 = torch.constant.int 1
%int1_491 = torch.constant.int 1
%int1_492 = torch.constant.int 1
%int1_493 = torch.constant.int 1
%int0_494 = torch.constant.int 0
%691 = torch.prim.ListConstruct %int0_488, %int0_489 : (!torch.int, !torch.int) -> !torch.list<int>
%692 = torch.prim.ListConstruct %int1_490, %int1_491 : (!torch.int, !torch.int) -> !torch.list<int>
%693 = torch.prim.ListConstruct %int1_492, %int1_493 : (!torch.int, !torch.int) -> !torch.list<int>
%694 = torch.prim.ListConstruct %int0_494, %int0_494 : (!torch.int, !torch.int) -> !torch.list<int>
%false_495 = torch.constant.bool false
%int1_496 = torch.constant.int 1
%695 = torch_c.to_i64 %int1_496
%696 = torch_c.to_i64 %int0_488
%697 = torch_c.to_i64 %int0_489
%698 = torch_c.to_i64 %int0_494
%699 = torch_c.to_i64 %int0_494
%c0_497 = arith.constant 0 : index
%c1_498 = arith.constant 1 : index
%c1_499 = arith.constant 1 : index
%c64_500 = arith.constant 64 : index
%c2_501 = arith.constant 2 : index
%c56_502 = arith.constant 56 : index
%c3_503 = arith.constant 3 : index
%c56_504 = arith.constant 56 : index
%c0_505 = arith.constant 0 : index
%c256_506 = arith.constant 256 : index
%c1_507 = arith.constant 1 : index
%c64_508 = arith.constant 64 : index
%c2_509 = arith.constant 2 : index
%c1_510 = arith.constant 1 : index
%c3_511 = arith.constant 3 : index
%c1_512 = arith.constant 1 : index
%700 = arith.index_cast %695 : i64 to index
%c0_513 = arith.constant 0 : index
%701 = arith.remsi %c64_500, %700 : index
%702 = arith.cmpi eq, %c0_513, %701 : index
cf.assert %702, "invalid: groups must divide input channel size evenly."
%c0_514 = arith.constant 0 : index
%703 = arith.remsi %c256_506, %700 : index
%704 = arith.cmpi eq, %c0_514, %703 : index
cf.assert %704, "invalid: groups must divide weight batch size evenly."
%c1_i64_515 = arith.constant 1 : i64
%c1_i64_516 = arith.constant 1 : i64
%c1_i64_517 = arith.constant 1 : i64
%c1_i64_518 = arith.constant 1 : i64
%cst_519 = arith.constant 0.000000e+00 : f32
%c0_520 = arith.constant 0 : index
%c1_521 = arith.constant 1 : index
%c1_522 = arith.constant 1 : index
%c64_523 = arith.constant 64 : index
%c2_524 = arith.constant 2 : index
%c56_525 = arith.constant 56 : index
%c3_526 = arith.constant 3 : index
%c56_527 = arith.constant 56 : index
%c0_i64_528 = arith.constant 0 : i64
%705 = arith.index_cast %c0_i64_528 : i64 to index
%706 = arith.index_cast %c0_i64_528 : i64 to index
%707 = arith.index_cast %696 : i64 to index
%708 = arith.index_cast %697 : i64 to index
%padded_529 = tensor.pad %cast_156 low[%705, %706, %707, %708] high[%705, %706, %707, %708] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_519 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%709 = arith.index_cast %c1_510 : index to i64
%c1_i64_530 = arith.constant 1 : i64
%c2_i64_531 = arith.constant 2 : i64
%710 = arith.muli %696, %c2_i64_531 : i64
%711 = arith.index_cast %c56_502 : index to i64
%712 = arith.addi %711, %710 : i64
%713 = arith.subi %709, %c1_i64_530 : i64
%714 = arith.muli %c1_i64_515, %713 : i64
%715 = arith.subi %712, %714 : i64
%716 = arith.subi %715, %c1_i64_530 : i64
%717 = arith.floordivsi %716, %c1_i64_517 : i64
%718 = arith.addi %717, %c1_i64_530 : i64
%719 = arith.index_cast %718 : i64 to index
%720 = arith.index_cast %c1_512 : index to i64
%c1_i64_532 = arith.constant 1 : i64
%c2_i64_533 = arith.constant 2 : i64
%721 = arith.muli %697, %c2_i64_533 : i64
%722 = arith.index_cast %c56_504 : index to i64
%723 = arith.addi %722, %721 : i64
%724 = arith.subi %720, %c1_i64_532 : i64
%725 = arith.muli %c1_i64_516, %724 : i64
%726 = arith.subi %723, %725 : i64
%727 = arith.subi %726, %c1_i64_532 : i64
%728 = arith.floordivsi %727, %c1_i64_518 : i64
%729 = arith.addi %728, %c1_i64_532 : i64
%730 = arith.index_cast %729 : i64 to index
%731 = tensor.empty(%719, %730) : tensor<1x256x?x?xf32>
%732 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_487 : tensor<256xf32>) outs(%731 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%733 = arith.floordivsi %c64_500, %700 : index
%734 = arith.floordivsi %c256_506, %700 : index
%c0_534 = arith.constant 0 : index
%c1_535 = arith.constant 1 : index
%735 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_529, %cast_476 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%732 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_536 = tensor.cast %735 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
%736 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%737 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_537 = torch.constant.int 12
%738 = torch.aten.item %736 : !torch.vtensor<[],f32> -> !torch.float
%739 = torch_c.to_f64 %738
%740 = torch.aten.item %737 : !torch.vtensor<[],si8> -> !torch.int
%741 = torch_c.to_i64 %740
%c1_538 = arith.constant 1 : index
%c1_539 = arith.constant 1 : index
%c256_540 = arith.constant 256 : index
%c2_541 = arith.constant 2 : index
%c56_542 = arith.constant 56 : index
%c3_543 = arith.constant 3 : index
%c56_544 = arith.constant 56 : index
%742 = tensor.empty() : tensor<1x256x56x56xi8>
%743 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_536 : tensor<1x256x56x56xf32>) outs(%742 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %740
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %738
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_545 = tensor.cast %743 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_546 = tensor.cast %cast_545 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%744 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%745 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%746 = torch.aten.item %744 : !torch.vtensor<[],f32> -> !torch.float
%747 = torch_c.to_f64 %746
%748 = torch.aten.item %745 : !torch.vtensor<[],si8> -> !torch.int
%749 = torch_c.to_i64 %748
%cast_547 = tensor.cast %cast_546 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_548 = arith.constant 1 : index
%c1_549 = arith.constant 1 : index
%c256_550 = arith.constant 256 : index
%c2_551 = arith.constant 2 : index
%c56_552 = arith.constant 56 : index
%c3_553 = arith.constant 3 : index
%c56_554 = arith.constant 56 : index
%750 = tensor.empty() : tensor<1x256x56x56xf32>
%751 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_547 : tensor<1x256x56x56xi8>) outs(%750 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %748
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %746
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_555 = tensor.cast %751 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%int1_556 = torch.constant.int 1
%752 = torch_c.to_i64 %int1_556
%c1_557 = arith.constant 1 : index
%c1_558 = arith.constant 1 : index
%c256_559 = arith.constant 256 : index
%c2_560 = arith.constant 2 : index
%c56_561 = arith.constant 56 : index
%c3_562 = arith.constant 3 : index
%c56_563 = arith.constant 56 : index
%c1_564 = arith.constant 1 : index
%c256_565 = arith.constant 256 : index
%753 = arith.cmpi eq, %c256_559, %c256_565 : index
cf.assert %753, "mismatched size for broadcast"
%c2_566 = arith.constant 2 : index
%c56_567 = arith.constant 56 : index
%754 = arith.cmpi eq, %c56_561, %c56_567 : index
cf.assert %754, "mismatched size for broadcast"
%c3_568 = arith.constant 3 : index
%c56_569 = arith.constant 56 : index
%755 = arith.cmpi eq, %c56_563, %c56_569 : index
cf.assert %755, "mismatched size for broadcast"
%756 = tensor.empty() : tensor<1x256x56x56xf32>
%757 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_461, %cast_555 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%756 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %752 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x256x56x56xf32>
%cast_570 = tensor.cast %757 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%c1_571 = arith.constant 1 : index
%c1_572 = arith.constant 1 : index
%c256_573 = arith.constant 256 : index
%c2_574 = arith.constant 2 : index
%c56_575 = arith.constant 56 : index
%c3_576 = arith.constant 3 : index
%c56_577 = arith.constant 56 : index
%758 = tensor.empty() : tensor<1x256x56x56xf32>
%759 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_570 : tensor<1x256x56x56xf32>) outs(%758 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x56x56xf32>
%cast_578 = tensor.cast %759 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%760 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%761 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_579 = torch.constant.int 12
%762 = torch.aten.item %760 : !torch.vtensor<[],f32> -> !torch.float
%763 = torch_c.to_f64 %762
%764 = torch.aten.item %761 : !torch.vtensor<[],si8> -> !torch.int
%765 = torch_c.to_i64 %764
%c1_580 = arith.constant 1 : index
%c1_581 = arith.constant 1 : index
%c256_582 = arith.constant 256 : index
%c2_583 = arith.constant 2 : index
%c56_584 = arith.constant 56 : index
%c3_585 = arith.constant 3 : index
%c56_586 = arith.constant 56 : index
%766 = tensor.empty() : tensor<1x256x56x56xi8>
%767 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_578 : tensor<1x256x56x56xf32>) outs(%766 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %764
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %762
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_587 = tensor.cast %767 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_588 = tensor.cast %cast_587 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%768 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%769 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%770 = torch.aten.item %768 : !torch.vtensor<[],f32> -> !torch.float
%771 = torch_c.to_f64 %770
%772 = torch.aten.item %769 : !torch.vtensor<[],si8> -> !torch.int
%773 = torch_c.to_i64 %772
%cast_589 = tensor.cast %cast_588 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_590 = arith.constant 1 : index
%c1_591 = arith.constant 1 : index
%c256_592 = arith.constant 256 : index
%c2_593 = arith.constant 2 : index
%c56_594 = arith.constant 56 : index
%c3_595 = arith.constant 3 : index
%c56_596 = arith.constant 56 : index
%774 = tensor.empty() : tensor<1x256x56x56xf32>
%775 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_589 : tensor<1x256x56x56xi8>) outs(%774 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %772
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %770
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_597 = tensor.cast %775 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%776 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%777 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_598 = torch.constant.int 12
%778 = torch.aten.item %776 : !torch.vtensor<[],f32> -> !torch.float
%779 = torch_c.to_f64 %778
%780 = torch.aten.item %777 : !torch.vtensor<[],si8> -> !torch.int
%781 = torch_c.to_i64 %780
%c1_599 = arith.constant 1 : index
%c0_600 = arith.constant 0 : index
%c64_601 = arith.constant 64 : index
%c1_602 = arith.constant 1 : index
%c256_603 = arith.constant 256 : index
%782 = tensor.empty() : tensor<64x256x1x1xi8>
%783 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%22 : tensor<64x256x1x1xf32>) outs(%782 : tensor<64x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %780
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %778
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x256x1x1xi8>
%cast_604 = tensor.cast %783 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%cast_605 = tensor.cast %cast_604 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%784 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%785 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%786 = torch.aten.item %784 : !torch.vtensor<[],f32> -> !torch.float
%787 = torch_c.to_f64 %786
%788 = torch.aten.item %785 : !torch.vtensor<[],si8> -> !torch.int
%789 = torch_c.to_i64 %788
%cast_606 = tensor.cast %cast_605 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%c1_607 = arith.constant 1 : index
%c0_608 = arith.constant 0 : index
%c64_609 = arith.constant 64 : index
%c1_610 = arith.constant 1 : index
%c256_611 = arith.constant 256 : index
%790 = tensor.empty() : tensor<64x256x1x1xf32>
%791 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_606 : tensor<64x256x1x1xi8>) outs(%790 : tensor<64x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %788
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %786
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x256x1x1xf32>
%cast_612 = tensor.cast %791 : tensor<64x256x1x1xf32> to tensor<64x256x1x1xf32>
%792 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%793 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_613 = torch.constant.int 12
%794 = torch.aten.item %792 : !torch.vtensor<[],f32> -> !torch.float
%795 = torch_c.to_f64 %794
%796 = torch.aten.item %793 : !torch.vtensor<[],si8> -> !torch.int
%797 = torch_c.to_i64 %796
%c1_614 = arith.constant 1 : index
%c0_615 = arith.constant 0 : index
%c64_616 = arith.constant 64 : index
%798 = tensor.empty() : tensor<64xi8>
%799 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%24 : tensor<64xf32>) outs(%798 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %796
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %794
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_617 = tensor.cast %799 : tensor<64xi8> to tensor<64xi8>
%cast_618 = tensor.cast %cast_617 : tensor<64xi8> to tensor<64xi8>
%800 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%801 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%802 = torch.aten.item %800 : !torch.vtensor<[],f32> -> !torch.float
%803 = torch_c.to_f64 %802
%804 = torch.aten.item %801 : !torch.vtensor<[],si8> -> !torch.int
%805 = torch_c.to_i64 %804
%cast_619 = tensor.cast %cast_618 : tensor<64xi8> to tensor<64xi8>
%c1_620 = arith.constant 1 : index
%c0_621 = arith.constant 0 : index
%c64_622 = arith.constant 64 : index
%806 = tensor.empty() : tensor<64xf32>
%807 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_619 : tensor<64xi8>) outs(%806 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %804
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %802
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_623 = tensor.cast %807 : tensor<64xf32> to tensor<64xf32>
%int0_624 = torch.constant.int 0
%int0_625 = torch.constant.int 0
%int1_626 = torch.constant.int 1
%int1_627 = torch.constant.int 1
%int1_628 = torch.constant.int 1
%int1_629 = torch.constant.int 1
%int0_630 = torch.constant.int 0
%808 = torch.prim.ListConstruct %int0_624, %int0_625 : (!torch.int, !torch.int) -> !torch.list<int>
%809 = torch.prim.ListConstruct %int1_626, %int1_627 : (!torch.int, !torch.int) -> !torch.list<int>
%810 = torch.prim.ListConstruct %int1_628, %int1_629 : (!torch.int, !torch.int) -> !torch.list<int>
%811 = torch.prim.ListConstruct %int0_630, %int0_630 : (!torch.int, !torch.int) -> !torch.list<int>
%false_631 = torch.constant.bool false
%int1_632 = torch.constant.int 1
%812 = torch_c.to_i64 %int1_632
%813 = torch_c.to_i64 %int0_624
%814 = torch_c.to_i64 %int0_625
%815 = torch_c.to_i64 %int0_630
%816 = torch_c.to_i64 %int0_630
%c0_633 = arith.constant 0 : index
%c1_634 = arith.constant 1 : index
%c1_635 = arith.constant 1 : index
%c256_636 = arith.constant 256 : index
%c2_637 = arith.constant 2 : index
%c56_638 = arith.constant 56 : index
%c3_639 = arith.constant 3 : index
%c56_640 = arith.constant 56 : index
%c0_641 = arith.constant 0 : index
%c64_642 = arith.constant 64 : index
%c1_643 = arith.constant 1 : index
%c256_644 = arith.constant 256 : index
%c2_645 = arith.constant 2 : index
%c1_646 = arith.constant 1 : index
%c3_647 = arith.constant 3 : index
%c1_648 = arith.constant 1 : index
%817 = arith.index_cast %812 : i64 to index
%c0_649 = arith.constant 0 : index
%818 = arith.remsi %c256_636, %817 : index
%819 = arith.cmpi eq, %c0_649, %818 : index
cf.assert %819, "invalid: groups must divide input channel size evenly."
%c0_650 = arith.constant 0 : index
%820 = arith.remsi %c64_642, %817 : index
%821 = arith.cmpi eq, %c0_650, %820 : index
cf.assert %821, "invalid: groups must divide weight batch size evenly."
%c1_i64_651 = arith.constant 1 : i64
%c1_i64_652 = arith.constant 1 : i64
%c1_i64_653 = arith.constant 1 : i64
%c1_i64_654 = arith.constant 1 : i64
%cst_655 = arith.constant 0.000000e+00 : f32
%c0_656 = arith.constant 0 : index
%c1_657 = arith.constant 1 : index
%c1_658 = arith.constant 1 : index
%c256_659 = arith.constant 256 : index
%c2_660 = arith.constant 2 : index
%c56_661 = arith.constant 56 : index
%c3_662 = arith.constant 3 : index
%c56_663 = arith.constant 56 : index
%c0_i64_664 = arith.constant 0 : i64
%822 = arith.index_cast %c0_i64_664 : i64 to index
%823 = arith.index_cast %c0_i64_664 : i64 to index
%824 = arith.index_cast %813 : i64 to index
%825 = arith.index_cast %814 : i64 to index
%padded_665 = tensor.pad %cast_597 low[%822, %823, %824, %825] high[%822, %823, %824, %825] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_655 : f32
} : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
%826 = arith.index_cast %c1_646 : index to i64
%c1_i64_666 = arith.constant 1 : i64
%c2_i64_667 = arith.constant 2 : i64
%827 = arith.muli %813, %c2_i64_667 : i64
%828 = arith.index_cast %c56_638 : index to i64
%829 = arith.addi %828, %827 : i64
%830 = arith.subi %826, %c1_i64_666 : i64
%831 = arith.muli %c1_i64_651, %830 : i64
%832 = arith.subi %829, %831 : i64
%833 = arith.subi %832, %c1_i64_666 : i64
%834 = arith.floordivsi %833, %c1_i64_653 : i64
%835 = arith.addi %834, %c1_i64_666 : i64
%836 = arith.index_cast %835 : i64 to index
%837 = arith.index_cast %c1_648 : index to i64
%c1_i64_668 = arith.constant 1 : i64
%c2_i64_669 = arith.constant 2 : i64
%838 = arith.muli %814, %c2_i64_669 : i64
%839 = arith.index_cast %c56_640 : index to i64
%840 = arith.addi %839, %838 : i64
%841 = arith.subi %837, %c1_i64_668 : i64
%842 = arith.muli %c1_i64_652, %841 : i64
%843 = arith.subi %840, %842 : i64
%844 = arith.subi %843, %c1_i64_668 : i64
%845 = arith.floordivsi %844, %c1_i64_654 : i64
%846 = arith.addi %845, %c1_i64_668 : i64
%847 = arith.index_cast %846 : i64 to index
%848 = tensor.empty(%836, %847) : tensor<1x64x?x?xf32>
%849 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_623 : tensor<64xf32>) outs(%848 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%850 = arith.floordivsi %c256_636, %817 : index
%851 = arith.floordivsi %c64_642, %817 : index
%c0_670 = arith.constant 0 : index
%c1_671 = arith.constant 1 : index
%852 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_665, %cast_612 : tensor<?x?x?x?xf32>, tensor<64x256x1x1xf32>) outs(%849 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_672 = tensor.cast %852 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_673 = arith.constant 1 : index
%c1_674 = arith.constant 1 : index
%c64_675 = arith.constant 64 : index
%c2_676 = arith.constant 2 : index
%c56_677 = arith.constant 56 : index
%c3_678 = arith.constant 3 : index
%c56_679 = arith.constant 56 : index
%853 = tensor.empty() : tensor<1x64x56x56xf32>
%854 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_672 : tensor<1x64x56x56xf32>) outs(%853 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_680 = tensor.cast %854 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%855 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%856 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_681 = torch.constant.int 12
%857 = torch.aten.item %855 : !torch.vtensor<[],f32> -> !torch.float
%858 = torch_c.to_f64 %857
%859 = torch.aten.item %856 : !torch.vtensor<[],si8> -> !torch.int
%860 = torch_c.to_i64 %859
%c1_682 = arith.constant 1 : index
%c1_683 = arith.constant 1 : index
%c64_684 = arith.constant 64 : index
%c2_685 = arith.constant 2 : index
%c56_686 = arith.constant 56 : index
%c3_687 = arith.constant 3 : index
%c56_688 = arith.constant 56 : index
%861 = tensor.empty() : tensor<1x64x56x56xi8>
%862 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_680 : tensor<1x64x56x56xf32>) outs(%861 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %859
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %857
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_689 = tensor.cast %862 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_690 = tensor.cast %cast_689 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%863 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%864 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%865 = torch.aten.item %863 : !torch.vtensor<[],f32> -> !torch.float
%866 = torch_c.to_f64 %865
%867 = torch.aten.item %864 : !torch.vtensor<[],si8> -> !torch.int
%868 = torch_c.to_i64 %867
%cast_691 = tensor.cast %cast_690 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_692 = arith.constant 1 : index
%c1_693 = arith.constant 1 : index
%c64_694 = arith.constant 64 : index
%c2_695 = arith.constant 2 : index
%c56_696 = arith.constant 56 : index
%c3_697 = arith.constant 3 : index
%c56_698 = arith.constant 56 : index
%869 = tensor.empty() : tensor<1x64x56x56xf32>
%870 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_691 : tensor<1x64x56x56xi8>) outs(%869 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %867
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %865
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_699 = tensor.cast %870 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%871 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%872 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_700 = torch.constant.int 12
%873 = torch.aten.item %871 : !torch.vtensor<[],f32> -> !torch.float
%874 = torch_c.to_f64 %873
%875 = torch.aten.item %872 : !torch.vtensor<[],si8> -> !torch.int
%876 = torch_c.to_i64 %875
%c1_701 = arith.constant 1 : index
%c0_702 = arith.constant 0 : index
%c64_703 = arith.constant 64 : index
%c1_704 = arith.constant 1 : index
%c64_705 = arith.constant 64 : index
%c2_706 = arith.constant 2 : index
%c3_707 = arith.constant 3 : index
%c3_708 = arith.constant 3 : index
%c3_709 = arith.constant 3 : index
%877 = tensor.empty() : tensor<64x64x3x3xi8>
%878 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26 : tensor<64x64x3x3xf32>) outs(%877 : tensor<64x64x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %875
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %873
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x64x3x3xi8>
%cast_710 = tensor.cast %878 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%cast_711 = tensor.cast %cast_710 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%879 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%880 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%881 = torch.aten.item %879 : !torch.vtensor<[],f32> -> !torch.float
%882 = torch_c.to_f64 %881
%883 = torch.aten.item %880 : !torch.vtensor<[],si8> -> !torch.int
%884 = torch_c.to_i64 %883
%cast_712 = tensor.cast %cast_711 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%c1_713 = arith.constant 1 : index
%c0_714 = arith.constant 0 : index
%c64_715 = arith.constant 64 : index
%c1_716 = arith.constant 1 : index
%c64_717 = arith.constant 64 : index
%c2_718 = arith.constant 2 : index
%c3_719 = arith.constant 3 : index
%c3_720 = arith.constant 3 : index
%c3_721 = arith.constant 3 : index
%885 = tensor.empty() : tensor<64x64x3x3xf32>
%886 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_712 : tensor<64x64x3x3xi8>) outs(%885 : tensor<64x64x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %883
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %881
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x64x3x3xf32>
%cast_722 = tensor.cast %886 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
%887 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%888 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_723 = torch.constant.int 12
%889 = torch.aten.item %887 : !torch.vtensor<[],f32> -> !torch.float
%890 = torch_c.to_f64 %889
%891 = torch.aten.item %888 : !torch.vtensor<[],si8> -> !torch.int
%892 = torch_c.to_i64 %891
%c1_724 = arith.constant 1 : index
%c0_725 = arith.constant 0 : index
%c64_726 = arith.constant 64 : index
%893 = tensor.empty() : tensor<64xi8>
%894 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%28 : tensor<64xf32>) outs(%893 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %891
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %889
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_727 = tensor.cast %894 : tensor<64xi8> to tensor<64xi8>
%cast_728 = tensor.cast %cast_727 : tensor<64xi8> to tensor<64xi8>
%895 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%896 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%897 = torch.aten.item %895 : !torch.vtensor<[],f32> -> !torch.float
%898 = torch_c.to_f64 %897
%899 = torch.aten.item %896 : !torch.vtensor<[],si8> -> !torch.int
%900 = torch_c.to_i64 %899
%cast_729 = tensor.cast %cast_728 : tensor<64xi8> to tensor<64xi8>
%c1_730 = arith.constant 1 : index
%c0_731 = arith.constant 0 : index
%c64_732 = arith.constant 64 : index
%901 = tensor.empty() : tensor<64xf32>
%902 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_729 : tensor<64xi8>) outs(%901 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %899
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %897
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_733 = tensor.cast %902 : tensor<64xf32> to tensor<64xf32>
%int1_734 = torch.constant.int 1
%int1_735 = torch.constant.int 1
%int1_736 = torch.constant.int 1
%int1_737 = torch.constant.int 1
%int1_738 = torch.constant.int 1
%int1_739 = torch.constant.int 1
%int0_740 = torch.constant.int 0
%903 = torch.prim.ListConstruct %int1_734, %int1_735 : (!torch.int, !torch.int) -> !torch.list<int>
%904 = torch.prim.ListConstruct %int1_736, %int1_737 : (!torch.int, !torch.int) -> !torch.list<int>
%905 = torch.prim.ListConstruct %int1_738, %int1_739 : (!torch.int, !torch.int) -> !torch.list<int>
%906 = torch.prim.ListConstruct %int0_740, %int0_740 : (!torch.int, !torch.int) -> !torch.list<int>
%false_741 = torch.constant.bool false
%int1_742 = torch.constant.int 1
%907 = torch_c.to_i64 %int1_742
%908 = torch_c.to_i64 %int1_734
%909 = torch_c.to_i64 %int1_735
%910 = torch_c.to_i64 %int0_740
%911 = torch_c.to_i64 %int0_740
%c0_743 = arith.constant 0 : index
%c1_744 = arith.constant 1 : index
%c1_745 = arith.constant 1 : index
%c64_746 = arith.constant 64 : index
%c2_747 = arith.constant 2 : index
%c56_748 = arith.constant 56 : index
%c3_749 = arith.constant 3 : index
%c56_750 = arith.constant 56 : index
%c0_751 = arith.constant 0 : index
%c64_752 = arith.constant 64 : index
%c1_753 = arith.constant 1 : index
%c64_754 = arith.constant 64 : index
%c2_755 = arith.constant 2 : index
%c3_756 = arith.constant 3 : index
%c3_757 = arith.constant 3 : index
%c3_758 = arith.constant 3 : index
%912 = arith.index_cast %907 : i64 to index
%c0_759 = arith.constant 0 : index
%913 = arith.remsi %c64_746, %912 : index
%914 = arith.cmpi eq, %c0_759, %913 : index
cf.assert %914, "invalid: groups must divide input channel size evenly."
%c0_760 = arith.constant 0 : index
%915 = arith.remsi %c64_752, %912 : index
%916 = arith.cmpi eq, %c0_760, %915 : index
cf.assert %916, "invalid: groups must divide weight batch size evenly."
%c1_i64_761 = arith.constant 1 : i64
%c1_i64_762 = arith.constant 1 : i64
%c1_i64_763 = arith.constant 1 : i64
%c1_i64_764 = arith.constant 1 : i64
%cst_765 = arith.constant 0.000000e+00 : f32
%c0_766 = arith.constant 0 : index
%c1_767 = arith.constant 1 : index
%c1_768 = arith.constant 1 : index
%c64_769 = arith.constant 64 : index
%c2_770 = arith.constant 2 : index
%c56_771 = arith.constant 56 : index
%c3_772 = arith.constant 3 : index
%c56_773 = arith.constant 56 : index
%c0_i64_774 = arith.constant 0 : i64
%917 = arith.index_cast %c0_i64_774 : i64 to index
%918 = arith.index_cast %c0_i64_774 : i64 to index
%919 = arith.index_cast %908 : i64 to index
%920 = arith.index_cast %909 : i64 to index
%padded_775 = tensor.pad %cast_699 low[%917, %918, %919, %920] high[%917, %918, %919, %920] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_765 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%921 = arith.index_cast %c3_756 : index to i64
%c1_i64_776 = arith.constant 1 : i64
%c2_i64_777 = arith.constant 2 : i64
%922 = arith.muli %908, %c2_i64_777 : i64
%923 = arith.index_cast %c56_748 : index to i64
%924 = arith.addi %923, %922 : i64
%925 = arith.subi %921, %c1_i64_776 : i64
%926 = arith.muli %c1_i64_761, %925 : i64
%927 = arith.subi %924, %926 : i64
%928 = arith.subi %927, %c1_i64_776 : i64
%929 = arith.floordivsi %928, %c1_i64_763 : i64
%930 = arith.addi %929, %c1_i64_776 : i64
%931 = arith.index_cast %930 : i64 to index
%932 = arith.index_cast %c3_758 : index to i64
%c1_i64_778 = arith.constant 1 : i64
%c2_i64_779 = arith.constant 2 : i64
%933 = arith.muli %909, %c2_i64_779 : i64
%934 = arith.index_cast %c56_750 : index to i64
%935 = arith.addi %934, %933 : i64
%936 = arith.subi %932, %c1_i64_778 : i64
%937 = arith.muli %c1_i64_762, %936 : i64
%938 = arith.subi %935, %937 : i64
%939 = arith.subi %938, %c1_i64_778 : i64
%940 = arith.floordivsi %939, %c1_i64_764 : i64
%941 = arith.addi %940, %c1_i64_778 : i64
%942 = arith.index_cast %941 : i64 to index
%943 = tensor.empty(%931, %942) : tensor<1x64x?x?xf32>
%944 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_733 : tensor<64xf32>) outs(%943 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%945 = arith.floordivsi %c64_746, %912 : index
%946 = arith.floordivsi %c64_752, %912 : index
%c0_780 = arith.constant 0 : index
%c1_781 = arith.constant 1 : index
%947 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_775, %cast_722 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%944 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_782 = tensor.cast %947 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_783 = arith.constant 1 : index
%c1_784 = arith.constant 1 : index
%c64_785 = arith.constant 64 : index
%c2_786 = arith.constant 2 : index
%c56_787 = arith.constant 56 : index
%c3_788 = arith.constant 3 : index
%c56_789 = arith.constant 56 : index
%948 = tensor.empty() : tensor<1x64x56x56xf32>
%949 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_782 : tensor<1x64x56x56xf32>) outs(%948 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_790 = tensor.cast %949 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%950 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%951 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_791 = torch.constant.int 12
%952 = torch.aten.item %950 : !torch.vtensor<[],f32> -> !torch.float
%953 = torch_c.to_f64 %952
%954 = torch.aten.item %951 : !torch.vtensor<[],si8> -> !torch.int
%955 = torch_c.to_i64 %954
%c1_792 = arith.constant 1 : index
%c1_793 = arith.constant 1 : index
%c64_794 = arith.constant 64 : index
%c2_795 = arith.constant 2 : index
%c56_796 = arith.constant 56 : index
%c3_797 = arith.constant 3 : index
%c56_798 = arith.constant 56 : index
%956 = tensor.empty() : tensor<1x64x56x56xi8>
%957 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_790 : tensor<1x64x56x56xf32>) outs(%956 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %954
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %952
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_799 = tensor.cast %957 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_800 = tensor.cast %cast_799 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%958 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%959 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%960 = torch.aten.item %958 : !torch.vtensor<[],f32> -> !torch.float
%961 = torch_c.to_f64 %960
%962 = torch.aten.item %959 : !torch.vtensor<[],si8> -> !torch.int
%963 = torch_c.to_i64 %962
%cast_801 = tensor.cast %cast_800 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_802 = arith.constant 1 : index
%c1_803 = arith.constant 1 : index
%c64_804 = arith.constant 64 : index
%c2_805 = arith.constant 2 : index
%c56_806 = arith.constant 56 : index
%c3_807 = arith.constant 3 : index
%c56_808 = arith.constant 56 : index
%964 = tensor.empty() : tensor<1x64x56x56xf32>
%965 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_801 : tensor<1x64x56x56xi8>) outs(%964 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %962
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %960
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_809 = tensor.cast %965 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%966 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%967 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_810 = torch.constant.int 12
%968 = torch.aten.item %966 : !torch.vtensor<[],f32> -> !torch.float
%969 = torch_c.to_f64 %968
%970 = torch.aten.item %967 : !torch.vtensor<[],si8> -> !torch.int
%971 = torch_c.to_i64 %970
%c1_811 = arith.constant 1 : index
%c0_812 = arith.constant 0 : index
%c256_813 = arith.constant 256 : index
%c1_814 = arith.constant 1 : index
%c64_815 = arith.constant 64 : index
%972 = tensor.empty() : tensor<256x64x1x1xi8>
%973 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%30 : tensor<256x64x1x1xf32>) outs(%972 : tensor<256x64x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %970
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %968
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x64x1x1xi8>
%cast_816 = tensor.cast %973 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%cast_817 = tensor.cast %cast_816 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%974 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%975 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%976 = torch.aten.item %974 : !torch.vtensor<[],f32> -> !torch.float
%977 = torch_c.to_f64 %976
%978 = torch.aten.item %975 : !torch.vtensor<[],si8> -> !torch.int
%979 = torch_c.to_i64 %978
%cast_818 = tensor.cast %cast_817 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%c1_819 = arith.constant 1 : index
%c0_820 = arith.constant 0 : index
%c256_821 = arith.constant 256 : index
%c1_822 = arith.constant 1 : index
%c64_823 = arith.constant 64 : index
%980 = tensor.empty() : tensor<256x64x1x1xf32>
%981 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_818 : tensor<256x64x1x1xi8>) outs(%980 : tensor<256x64x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %978
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %976
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x64x1x1xf32>
%cast_824 = tensor.cast %981 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
%982 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%983 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_825 = torch.constant.int 12
%984 = torch.aten.item %982 : !torch.vtensor<[],f32> -> !torch.float
%985 = torch_c.to_f64 %984
%986 = torch.aten.item %983 : !torch.vtensor<[],si8> -> !torch.int
%987 = torch_c.to_i64 %986
%c1_826 = arith.constant 1 : index
%c0_827 = arith.constant 0 : index
%c256_828 = arith.constant 256 : index
%988 = tensor.empty() : tensor<256xi8>
%989 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%32 : tensor<256xf32>) outs(%988 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %986
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %984
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_829 = tensor.cast %989 : tensor<256xi8> to tensor<256xi8>
%cast_830 = tensor.cast %cast_829 : tensor<256xi8> to tensor<256xi8>
%990 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%991 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%992 = torch.aten.item %990 : !torch.vtensor<[],f32> -> !torch.float
%993 = torch_c.to_f64 %992
%994 = torch.aten.item %991 : !torch.vtensor<[],si8> -> !torch.int
%995 = torch_c.to_i64 %994
%cast_831 = tensor.cast %cast_830 : tensor<256xi8> to tensor<256xi8>
%c1_832 = arith.constant 1 : index
%c0_833 = arith.constant 0 : index
%c256_834 = arith.constant 256 : index
%996 = tensor.empty() : tensor<256xf32>
%997 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_831 : tensor<256xi8>) outs(%996 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %994
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %992
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_835 = tensor.cast %997 : tensor<256xf32> to tensor<256xf32>
%int0_836 = torch.constant.int 0
%int0_837 = torch.constant.int 0
%int1_838 = torch.constant.int 1
%int1_839 = torch.constant.int 1
%int1_840 = torch.constant.int 1
%int1_841 = torch.constant.int 1
%int0_842 = torch.constant.int 0
%998 = torch.prim.ListConstruct %int0_836, %int0_837 : (!torch.int, !torch.int) -> !torch.list<int>
%999 = torch.prim.ListConstruct %int1_838, %int1_839 : (!torch.int, !torch.int) -> !torch.list<int>
%1000 = torch.prim.ListConstruct %int1_840, %int1_841 : (!torch.int, !torch.int) -> !torch.list<int>
%1001 = torch.prim.ListConstruct %int0_842, %int0_842 : (!torch.int, !torch.int) -> !torch.list<int>
%false_843 = torch.constant.bool false
%int1_844 = torch.constant.int 1
%1002 = torch_c.to_i64 %int1_844
%1003 = torch_c.to_i64 %int0_836
%1004 = torch_c.to_i64 %int0_837
%1005 = torch_c.to_i64 %int0_842
%1006 = torch_c.to_i64 %int0_842
%c0_845 = arith.constant 0 : index
%c1_846 = arith.constant 1 : index
%c1_847 = arith.constant 1 : index
%c64_848 = arith.constant 64 : index
%c2_849 = arith.constant 2 : index
%c56_850 = arith.constant 56 : index
%c3_851 = arith.constant 3 : index
%c56_852 = arith.constant 56 : index
%c0_853 = arith.constant 0 : index
%c256_854 = arith.constant 256 : index
%c1_855 = arith.constant 1 : index
%c64_856 = arith.constant 64 : index
%c2_857 = arith.constant 2 : index
%c1_858 = arith.constant 1 : index
%c3_859 = arith.constant 3 : index
%c1_860 = arith.constant 1 : index
%1007 = arith.index_cast %1002 : i64 to index
%c0_861 = arith.constant 0 : index
%1008 = arith.remsi %c64_848, %1007 : index
%1009 = arith.cmpi eq, %c0_861, %1008 : index
cf.assert %1009, "invalid: groups must divide input channel size evenly."
%c0_862 = arith.constant 0 : index
%1010 = arith.remsi %c256_854, %1007 : index
%1011 = arith.cmpi eq, %c0_862, %1010 : index
cf.assert %1011, "invalid: groups must divide weight batch size evenly."
%c1_i64_863 = arith.constant 1 : i64
%c1_i64_864 = arith.constant 1 : i64
%c1_i64_865 = arith.constant 1 : i64
%c1_i64_866 = arith.constant 1 : i64
%cst_867 = arith.constant 0.000000e+00 : f32
%c0_868 = arith.constant 0 : index
%c1_869 = arith.constant 1 : index
%c1_870 = arith.constant 1 : index
%c64_871 = arith.constant 64 : index
%c2_872 = arith.constant 2 : index
%c56_873 = arith.constant 56 : index
%c3_874 = arith.constant 3 : index
%c56_875 = arith.constant 56 : index
%c0_i64_876 = arith.constant 0 : i64
%1012 = arith.index_cast %c0_i64_876 : i64 to index
%1013 = arith.index_cast %c0_i64_876 : i64 to index
%1014 = arith.index_cast %1003 : i64 to index
%1015 = arith.index_cast %1004 : i64 to index
%padded_877 = tensor.pad %cast_809 low[%1012, %1013, %1014, %1015] high[%1012, %1013, %1014, %1015] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_867 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%1016 = arith.index_cast %c1_858 : index to i64
%c1_i64_878 = arith.constant 1 : i64
%c2_i64_879 = arith.constant 2 : i64
%1017 = arith.muli %1003, %c2_i64_879 : i64
%1018 = arith.index_cast %c56_850 : index to i64
%1019 = arith.addi %1018, %1017 : i64
%1020 = arith.subi %1016, %c1_i64_878 : i64
%1021 = arith.muli %c1_i64_863, %1020 : i64
%1022 = arith.subi %1019, %1021 : i64
%1023 = arith.subi %1022, %c1_i64_878 : i64
%1024 = arith.floordivsi %1023, %c1_i64_865 : i64
%1025 = arith.addi %1024, %c1_i64_878 : i64
%1026 = arith.index_cast %1025 : i64 to index
%1027 = arith.index_cast %c1_860 : index to i64
%c1_i64_880 = arith.constant 1 : i64
%c2_i64_881 = arith.constant 2 : i64
%1028 = arith.muli %1004, %c2_i64_881 : i64
%1029 = arith.index_cast %c56_852 : index to i64
%1030 = arith.addi %1029, %1028 : i64
%1031 = arith.subi %1027, %c1_i64_880 : i64
%1032 = arith.muli %c1_i64_864, %1031 : i64
%1033 = arith.subi %1030, %1032 : i64
%1034 = arith.subi %1033, %c1_i64_880 : i64
%1035 = arith.floordivsi %1034, %c1_i64_866 : i64
%1036 = arith.addi %1035, %c1_i64_880 : i64
%1037 = arith.index_cast %1036 : i64 to index
%1038 = tensor.empty(%1026, %1037) : tensor<1x256x?x?xf32>
%1039 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_835 : tensor<256xf32>) outs(%1038 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%1040 = arith.floordivsi %c64_848, %1007 : index
%1041 = arith.floordivsi %c256_854, %1007 : index
%c0_882 = arith.constant 0 : index
%c1_883 = arith.constant 1 : index
%1042 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_877, %cast_824 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%1039 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_884 = tensor.cast %1042 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
%1043 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1044 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_885 = torch.constant.int 12
%1045 = torch.aten.item %1043 : !torch.vtensor<[],f32> -> !torch.float
%1046 = torch_c.to_f64 %1045
%1047 = torch.aten.item %1044 : !torch.vtensor<[],si8> -> !torch.int
%1048 = torch_c.to_i64 %1047
%c1_886 = arith.constant 1 : index
%c1_887 = arith.constant 1 : index
%c256_888 = arith.constant 256 : index
%c2_889 = arith.constant 2 : index
%c56_890 = arith.constant 56 : index
%c3_891 = arith.constant 3 : index
%c56_892 = arith.constant 56 : index
%1049 = tensor.empty() : tensor<1x256x56x56xi8>
%1050 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_884 : tensor<1x256x56x56xf32>) outs(%1049 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1047
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1045
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_893 = tensor.cast %1050 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_894 = tensor.cast %cast_893 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%1051 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1052 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1053 = torch.aten.item %1051 : !torch.vtensor<[],f32> -> !torch.float
%1054 = torch_c.to_f64 %1053
%1055 = torch.aten.item %1052 : !torch.vtensor<[],si8> -> !torch.int
%1056 = torch_c.to_i64 %1055
%cast_895 = tensor.cast %cast_894 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_896 = arith.constant 1 : index
%c1_897 = arith.constant 1 : index
%c256_898 = arith.constant 256 : index
%c2_899 = arith.constant 2 : index
%c56_900 = arith.constant 56 : index
%c3_901 = arith.constant 3 : index
%c56_902 = arith.constant 56 : index
%1057 = tensor.empty() : tensor<1x256x56x56xf32>
%1058 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_895 : tensor<1x256x56x56xi8>) outs(%1057 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1055
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1053
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_903 = tensor.cast %1058 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%int1_904 = torch.constant.int 1
%1059 = torch_c.to_i64 %int1_904
%c1_905 = arith.constant 1 : index
%c1_906 = arith.constant 1 : index
%c256_907 = arith.constant 256 : index
%c2_908 = arith.constant 2 : index
%c56_909 = arith.constant 56 : index
%c3_910 = arith.constant 3 : index
%c56_911 = arith.constant 56 : index
%c1_912 = arith.constant 1 : index
%c256_913 = arith.constant 256 : index
%1060 = arith.cmpi eq, %c256_907, %c256_913 : index
cf.assert %1060, "mismatched size for broadcast"
%c2_914 = arith.constant 2 : index
%c56_915 = arith.constant 56 : index
%1061 = arith.cmpi eq, %c56_909, %c56_915 : index
cf.assert %1061, "mismatched size for broadcast"
%c3_916 = arith.constant 3 : index
%c56_917 = arith.constant 56 : index
%1062 = arith.cmpi eq, %c56_911, %c56_917 : index
cf.assert %1062, "mismatched size for broadcast"
%1063 = tensor.empty() : tensor<1x256x56x56xf32>
%1064 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_903, %cast_597 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%1063 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %1059 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x256x56x56xf32>
%cast_918 = tensor.cast %1064 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%c1_919 = arith.constant 1 : index
%c1_920 = arith.constant 1 : index
%c256_921 = arith.constant 256 : index
%c2_922 = arith.constant 2 : index
%c56_923 = arith.constant 56 : index
%c3_924 = arith.constant 3 : index
%c56_925 = arith.constant 56 : index
%1065 = tensor.empty() : tensor<1x256x56x56xf32>
%1066 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_918 : tensor<1x256x56x56xf32>) outs(%1065 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x56x56xf32>
%cast_926 = tensor.cast %1066 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%1067 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1068 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_927 = torch.constant.int 12
%1069 = torch.aten.item %1067 : !torch.vtensor<[],f32> -> !torch.float
%1070 = torch_c.to_f64 %1069
%1071 = torch.aten.item %1068 : !torch.vtensor<[],si8> -> !torch.int
%1072 = torch_c.to_i64 %1071
%c1_928 = arith.constant 1 : index
%c1_929 = arith.constant 1 : index
%c256_930 = arith.constant 256 : index
%c2_931 = arith.constant 2 : index
%c56_932 = arith.constant 56 : index
%c3_933 = arith.constant 3 : index
%c56_934 = arith.constant 56 : index
%1073 = tensor.empty() : tensor<1x256x56x56xi8>
%1074 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_926 : tensor<1x256x56x56xf32>) outs(%1073 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1071
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1069
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_935 = tensor.cast %1074 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_936 = tensor.cast %cast_935 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%1075 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1076 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1077 = torch.aten.item %1075 : !torch.vtensor<[],f32> -> !torch.float
%1078 = torch_c.to_f64 %1077
%1079 = torch.aten.item %1076 : !torch.vtensor<[],si8> -> !torch.int
%1080 = torch_c.to_i64 %1079
%cast_937 = tensor.cast %cast_936 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_938 = arith.constant 1 : index
%c1_939 = arith.constant 1 : index
%c256_940 = arith.constant 256 : index
%c2_941 = arith.constant 2 : index
%c56_942 = arith.constant 56 : index
%c3_943 = arith.constant 3 : index
%c56_944 = arith.constant 56 : index
%1081 = tensor.empty() : tensor<1x256x56x56xf32>
%1082 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_937 : tensor<1x256x56x56xi8>) outs(%1081 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1079
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1077
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_945 = tensor.cast %1082 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%1083 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1084 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_946 = torch.constant.int 12
%1085 = torch.aten.item %1083 : !torch.vtensor<[],f32> -> !torch.float
%1086 = torch_c.to_f64 %1085
%1087 = torch.aten.item %1084 : !torch.vtensor<[],si8> -> !torch.int
%1088 = torch_c.to_i64 %1087
%c1_947 = arith.constant 1 : index
%c0_948 = arith.constant 0 : index
%c64_949 = arith.constant 64 : index
%c1_950 = arith.constant 1 : index
%c256_951 = arith.constant 256 : index
%1089 = tensor.empty() : tensor<64x256x1x1xi8>
%1090 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%34 : tensor<64x256x1x1xf32>) outs(%1089 : tensor<64x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1087
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1085
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x256x1x1xi8>
%cast_952 = tensor.cast %1090 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%cast_953 = tensor.cast %cast_952 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%1091 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1092 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1093 = torch.aten.item %1091 : !torch.vtensor<[],f32> -> !torch.float
%1094 = torch_c.to_f64 %1093
%1095 = torch.aten.item %1092 : !torch.vtensor<[],si8> -> !torch.int
%1096 = torch_c.to_i64 %1095
%cast_954 = tensor.cast %cast_953 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
%c1_955 = arith.constant 1 : index
%c0_956 = arith.constant 0 : index
%c64_957 = arith.constant 64 : index
%c1_958 = arith.constant 1 : index
%c256_959 = arith.constant 256 : index
%1097 = tensor.empty() : tensor<64x256x1x1xf32>
%1098 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_954 : tensor<64x256x1x1xi8>) outs(%1097 : tensor<64x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1095
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1093
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x256x1x1xf32>
%cast_960 = tensor.cast %1098 : tensor<64x256x1x1xf32> to tensor<64x256x1x1xf32>
%1099 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1100 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_961 = torch.constant.int 12
%1101 = torch.aten.item %1099 : !torch.vtensor<[],f32> -> !torch.float
%1102 = torch_c.to_f64 %1101
%1103 = torch.aten.item %1100 : !torch.vtensor<[],si8> -> !torch.int
%1104 = torch_c.to_i64 %1103
%c1_962 = arith.constant 1 : index
%c0_963 = arith.constant 0 : index
%c64_964 = arith.constant 64 : index
%1105 = tensor.empty() : tensor<64xi8>
%1106 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%36 : tensor<64xf32>) outs(%1105 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1103
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1101
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_965 = tensor.cast %1106 : tensor<64xi8> to tensor<64xi8>
%cast_966 = tensor.cast %cast_965 : tensor<64xi8> to tensor<64xi8>
%1107 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1108 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1109 = torch.aten.item %1107 : !torch.vtensor<[],f32> -> !torch.float
%1110 = torch_c.to_f64 %1109
%1111 = torch.aten.item %1108 : !torch.vtensor<[],si8> -> !torch.int
%1112 = torch_c.to_i64 %1111
%cast_967 = tensor.cast %cast_966 : tensor<64xi8> to tensor<64xi8>
%c1_968 = arith.constant 1 : index
%c0_969 = arith.constant 0 : index
%c64_970 = arith.constant 64 : index
%1113 = tensor.empty() : tensor<64xf32>
%1114 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_967 : tensor<64xi8>) outs(%1113 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1111
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1109
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_971 = tensor.cast %1114 : tensor<64xf32> to tensor<64xf32>
%int0_972 = torch.constant.int 0
%int0_973 = torch.constant.int 0
%int1_974 = torch.constant.int 1
%int1_975 = torch.constant.int 1
%int1_976 = torch.constant.int 1
%int1_977 = torch.constant.int 1
%int0_978 = torch.constant.int 0
%1115 = torch.prim.ListConstruct %int0_972, %int0_973 : (!torch.int, !torch.int) -> !torch.list<int>
%1116 = torch.prim.ListConstruct %int1_974, %int1_975 : (!torch.int, !torch.int) -> !torch.list<int>
%1117 = torch.prim.ListConstruct %int1_976, %int1_977 : (!torch.int, !torch.int) -> !torch.list<int>
%1118 = torch.prim.ListConstruct %int0_978, %int0_978 : (!torch.int, !torch.int) -> !torch.list<int>
%false_979 = torch.constant.bool false
%int1_980 = torch.constant.int 1
%1119 = torch_c.to_i64 %int1_980
%1120 = torch_c.to_i64 %int0_972
%1121 = torch_c.to_i64 %int0_973
%1122 = torch_c.to_i64 %int0_978
%1123 = torch_c.to_i64 %int0_978
%c0_981 = arith.constant 0 : index
%c1_982 = arith.constant 1 : index
%c1_983 = arith.constant 1 : index
%c256_984 = arith.constant 256 : index
%c2_985 = arith.constant 2 : index
%c56_986 = arith.constant 56 : index
%c3_987 = arith.constant 3 : index
%c56_988 = arith.constant 56 : index
%c0_989 = arith.constant 0 : index
%c64_990 = arith.constant 64 : index
%c1_991 = arith.constant 1 : index
%c256_992 = arith.constant 256 : index
%c2_993 = arith.constant 2 : index
%c1_994 = arith.constant 1 : index
%c3_995 = arith.constant 3 : index
%c1_996 = arith.constant 1 : index
%1124 = arith.index_cast %1119 : i64 to index
%c0_997 = arith.constant 0 : index
%1125 = arith.remsi %c256_984, %1124 : index
%1126 = arith.cmpi eq, %c0_997, %1125 : index
cf.assert %1126, "invalid: groups must divide input channel size evenly."
%c0_998 = arith.constant 0 : index
%1127 = arith.remsi %c64_990, %1124 : index
%1128 = arith.cmpi eq, %c0_998, %1127 : index
cf.assert %1128, "invalid: groups must divide weight batch size evenly."
%c1_i64_999 = arith.constant 1 : i64
%c1_i64_1000 = arith.constant 1 : i64
%c1_i64_1001 = arith.constant 1 : i64
%c1_i64_1002 = arith.constant 1 : i64
%cst_1003 = arith.constant 0.000000e+00 : f32
%c0_1004 = arith.constant 0 : index
%c1_1005 = arith.constant 1 : index
%c1_1006 = arith.constant 1 : index
%c256_1007 = arith.constant 256 : index
%c2_1008 = arith.constant 2 : index
%c56_1009 = arith.constant 56 : index
%c3_1010 = arith.constant 3 : index
%c56_1011 = arith.constant 56 : index
%c0_i64_1012 = arith.constant 0 : i64
%1129 = arith.index_cast %c0_i64_1012 : i64 to index
%1130 = arith.index_cast %c0_i64_1012 : i64 to index
%1131 = arith.index_cast %1120 : i64 to index
%1132 = arith.index_cast %1121 : i64 to index
%padded_1013 = tensor.pad %cast_945 low[%1129, %1130, %1131, %1132] high[%1129, %1130, %1131, %1132] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1003 : f32
} : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
%1133 = arith.index_cast %c1_994 : index to i64
%c1_i64_1014 = arith.constant 1 : i64
%c2_i64_1015 = arith.constant 2 : i64
%1134 = arith.muli %1120, %c2_i64_1015 : i64
%1135 = arith.index_cast %c56_986 : index to i64
%1136 = arith.addi %1135, %1134 : i64
%1137 = arith.subi %1133, %c1_i64_1014 : i64
%1138 = arith.muli %c1_i64_999, %1137 : i64
%1139 = arith.subi %1136, %1138 : i64
%1140 = arith.subi %1139, %c1_i64_1014 : i64
%1141 = arith.floordivsi %1140, %c1_i64_1001 : i64
%1142 = arith.addi %1141, %c1_i64_1014 : i64
%1143 = arith.index_cast %1142 : i64 to index
%1144 = arith.index_cast %c1_996 : index to i64
%c1_i64_1016 = arith.constant 1 : i64
%c2_i64_1017 = arith.constant 2 : i64
%1145 = arith.muli %1121, %c2_i64_1017 : i64
%1146 = arith.index_cast %c56_988 : index to i64
%1147 = arith.addi %1146, %1145 : i64
%1148 = arith.subi %1144, %c1_i64_1016 : i64
%1149 = arith.muli %c1_i64_1000, %1148 : i64
%1150 = arith.subi %1147, %1149 : i64
%1151 = arith.subi %1150, %c1_i64_1016 : i64
%1152 = arith.floordivsi %1151, %c1_i64_1002 : i64
%1153 = arith.addi %1152, %c1_i64_1016 : i64
%1154 = arith.index_cast %1153 : i64 to index
%1155 = tensor.empty(%1143, %1154) : tensor<1x64x?x?xf32>
%1156 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_971 : tensor<64xf32>) outs(%1155 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%1157 = arith.floordivsi %c256_984, %1124 : index
%1158 = arith.floordivsi %c64_990, %1124 : index
%c0_1018 = arith.constant 0 : index
%c1_1019 = arith.constant 1 : index
%1159 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1013, %cast_960 : tensor<?x?x?x?xf32>, tensor<64x256x1x1xf32>) outs(%1156 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_1020 = tensor.cast %1159 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_1021 = arith.constant 1 : index
%c1_1022 = arith.constant 1 : index
%c64_1023 = arith.constant 64 : index
%c2_1024 = arith.constant 2 : index
%c56_1025 = arith.constant 56 : index
%c3_1026 = arith.constant 3 : index
%c56_1027 = arith.constant 56 : index
%1160 = tensor.empty() : tensor<1x64x56x56xf32>
%1161 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1020 : tensor<1x64x56x56xf32>) outs(%1160 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_1028 = tensor.cast %1161 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%1162 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1163 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1029 = torch.constant.int 12
%1164 = torch.aten.item %1162 : !torch.vtensor<[],f32> -> !torch.float
%1165 = torch_c.to_f64 %1164
%1166 = torch.aten.item %1163 : !torch.vtensor<[],si8> -> !torch.int
%1167 = torch_c.to_i64 %1166
%c1_1030 = arith.constant 1 : index
%c1_1031 = arith.constant 1 : index
%c64_1032 = arith.constant 64 : index
%c2_1033 = arith.constant 2 : index
%c56_1034 = arith.constant 56 : index
%c3_1035 = arith.constant 3 : index
%c56_1036 = arith.constant 56 : index
%1168 = tensor.empty() : tensor<1x64x56x56xi8>
%1169 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1028 : tensor<1x64x56x56xf32>) outs(%1168 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1166
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1164
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_1037 = tensor.cast %1169 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_1038 = tensor.cast %cast_1037 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%1170 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1171 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1172 = torch.aten.item %1170 : !torch.vtensor<[],f32> -> !torch.float
%1173 = torch_c.to_f64 %1172
%1174 = torch.aten.item %1171 : !torch.vtensor<[],si8> -> !torch.int
%1175 = torch_c.to_i64 %1174
%cast_1039 = tensor.cast %cast_1038 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_1040 = arith.constant 1 : index
%c1_1041 = arith.constant 1 : index
%c64_1042 = arith.constant 64 : index
%c2_1043 = arith.constant 2 : index
%c56_1044 = arith.constant 56 : index
%c3_1045 = arith.constant 3 : index
%c56_1046 = arith.constant 56 : index
%1176 = tensor.empty() : tensor<1x64x56x56xf32>
%1177 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1039 : tensor<1x64x56x56xi8>) outs(%1176 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1174
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1172
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_1047 = tensor.cast %1177 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%1178 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1179 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1048 = torch.constant.int 12
%1180 = torch.aten.item %1178 : !torch.vtensor<[],f32> -> !torch.float
%1181 = torch_c.to_f64 %1180
%1182 = torch.aten.item %1179 : !torch.vtensor<[],si8> -> !torch.int
%1183 = torch_c.to_i64 %1182
%c1_1049 = arith.constant 1 : index
%c0_1050 = arith.constant 0 : index
%c64_1051 = arith.constant 64 : index
%c1_1052 = arith.constant 1 : index
%c64_1053 = arith.constant 64 : index
%c2_1054 = arith.constant 2 : index
%c3_1055 = arith.constant 3 : index
%c3_1056 = arith.constant 3 : index
%c3_1057 = arith.constant 3 : index
%1184 = tensor.empty() : tensor<64x64x3x3xi8>
%1185 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%38 : tensor<64x64x3x3xf32>) outs(%1184 : tensor<64x64x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1182
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1180
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64x64x3x3xi8>
%cast_1058 = tensor.cast %1185 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%cast_1059 = tensor.cast %cast_1058 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%1186 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1187 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1188 = torch.aten.item %1186 : !torch.vtensor<[],f32> -> !torch.float
%1189 = torch_c.to_f64 %1188
%1190 = torch.aten.item %1187 : !torch.vtensor<[],si8> -> !torch.int
%1191 = torch_c.to_i64 %1190
%cast_1060 = tensor.cast %cast_1059 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
%c1_1061 = arith.constant 1 : index
%c0_1062 = arith.constant 0 : index
%c64_1063 = arith.constant 64 : index
%c1_1064 = arith.constant 1 : index
%c64_1065 = arith.constant 64 : index
%c2_1066 = arith.constant 2 : index
%c3_1067 = arith.constant 3 : index
%c3_1068 = arith.constant 3 : index
%c3_1069 = arith.constant 3 : index
%1192 = tensor.empty() : tensor<64x64x3x3xf32>
%1193 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1060 : tensor<64x64x3x3xi8>) outs(%1192 : tensor<64x64x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1190
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1188
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64x64x3x3xf32>
%cast_1070 = tensor.cast %1193 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
%1194 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1195 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1071 = torch.constant.int 12
%1196 = torch.aten.item %1194 : !torch.vtensor<[],f32> -> !torch.float
%1197 = torch_c.to_f64 %1196
%1198 = torch.aten.item %1195 : !torch.vtensor<[],si8> -> !torch.int
%1199 = torch_c.to_i64 %1198
%c1_1072 = arith.constant 1 : index
%c0_1073 = arith.constant 0 : index
%c64_1074 = arith.constant 64 : index
%1200 = tensor.empty() : tensor<64xi8>
%1201 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%40 : tensor<64xf32>) outs(%1200 : tensor<64xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1198
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1196
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<64xi8>
%cast_1075 = tensor.cast %1201 : tensor<64xi8> to tensor<64xi8>
%cast_1076 = tensor.cast %cast_1075 : tensor<64xi8> to tensor<64xi8>
%1202 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1203 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1204 = torch.aten.item %1202 : !torch.vtensor<[],f32> -> !torch.float
%1205 = torch_c.to_f64 %1204
%1206 = torch.aten.item %1203 : !torch.vtensor<[],si8> -> !torch.int
%1207 = torch_c.to_i64 %1206
%cast_1077 = tensor.cast %cast_1076 : tensor<64xi8> to tensor<64xi8>
%c1_1078 = arith.constant 1 : index
%c0_1079 = arith.constant 0 : index
%c64_1080 = arith.constant 64 : index
%1208 = tensor.empty() : tensor<64xf32>
%1209 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1077 : tensor<64xi8>) outs(%1208 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1206
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1204
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<64xf32>
%cast_1081 = tensor.cast %1209 : tensor<64xf32> to tensor<64xf32>
%int1_1082 = torch.constant.int 1
%int1_1083 = torch.constant.int 1
%int1_1084 = torch.constant.int 1
%int1_1085 = torch.constant.int 1
%int1_1086 = torch.constant.int 1
%int1_1087 = torch.constant.int 1
%int0_1088 = torch.constant.int 0
%1210 = torch.prim.ListConstruct %int1_1082, %int1_1083 : (!torch.int, !torch.int) -> !torch.list<int>
%1211 = torch.prim.ListConstruct %int1_1084, %int1_1085 : (!torch.int, !torch.int) -> !torch.list<int>
%1212 = torch.prim.ListConstruct %int1_1086, %int1_1087 : (!torch.int, !torch.int) -> !torch.list<int>
%1213 = torch.prim.ListConstruct %int0_1088, %int0_1088 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1089 = torch.constant.bool false
%int1_1090 = torch.constant.int 1
%1214 = torch_c.to_i64 %int1_1090
%1215 = torch_c.to_i64 %int1_1082
%1216 = torch_c.to_i64 %int1_1083
%1217 = torch_c.to_i64 %int0_1088
%1218 = torch_c.to_i64 %int0_1088
%c0_1091 = arith.constant 0 : index
%c1_1092 = arith.constant 1 : index
%c1_1093 = arith.constant 1 : index
%c64_1094 = arith.constant 64 : index
%c2_1095 = arith.constant 2 : index
%c56_1096 = arith.constant 56 : index
%c3_1097 = arith.constant 3 : index
%c56_1098 = arith.constant 56 : index
%c0_1099 = arith.constant 0 : index
%c64_1100 = arith.constant 64 : index
%c1_1101 = arith.constant 1 : index
%c64_1102 = arith.constant 64 : index
%c2_1103 = arith.constant 2 : index
%c3_1104 = arith.constant 3 : index
%c3_1105 = arith.constant 3 : index
%c3_1106 = arith.constant 3 : index
%1219 = arith.index_cast %1214 : i64 to index
%c0_1107 = arith.constant 0 : index
%1220 = arith.remsi %c64_1094, %1219 : index
%1221 = arith.cmpi eq, %c0_1107, %1220 : index
cf.assert %1221, "invalid: groups must divide input channel size evenly."
%c0_1108 = arith.constant 0 : index
%1222 = arith.remsi %c64_1100, %1219 : index
%1223 = arith.cmpi eq, %c0_1108, %1222 : index
cf.assert %1223, "invalid: groups must divide weight batch size evenly."
%c1_i64_1109 = arith.constant 1 : i64
%c1_i64_1110 = arith.constant 1 : i64
%c1_i64_1111 = arith.constant 1 : i64
%c1_i64_1112 = arith.constant 1 : i64
%cst_1113 = arith.constant 0.000000e+00 : f32
%c0_1114 = arith.constant 0 : index
%c1_1115 = arith.constant 1 : index
%c1_1116 = arith.constant 1 : index
%c64_1117 = arith.constant 64 : index
%c2_1118 = arith.constant 2 : index
%c56_1119 = arith.constant 56 : index
%c3_1120 = arith.constant 3 : index
%c56_1121 = arith.constant 56 : index
%c0_i64_1122 = arith.constant 0 : i64
%1224 = arith.index_cast %c0_i64_1122 : i64 to index
%1225 = arith.index_cast %c0_i64_1122 : i64 to index
%1226 = arith.index_cast %1215 : i64 to index
%1227 = arith.index_cast %1216 : i64 to index
%padded_1123 = tensor.pad %cast_1047 low[%1224, %1225, %1226, %1227] high[%1224, %1225, %1226, %1227] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1113 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%1228 = arith.index_cast %c3_1104 : index to i64
%c1_i64_1124 = arith.constant 1 : i64
%c2_i64_1125 = arith.constant 2 : i64
%1229 = arith.muli %1215, %c2_i64_1125 : i64
%1230 = arith.index_cast %c56_1096 : index to i64
%1231 = arith.addi %1230, %1229 : i64
%1232 = arith.subi %1228, %c1_i64_1124 : i64
%1233 = arith.muli %c1_i64_1109, %1232 : i64
%1234 = arith.subi %1231, %1233 : i64
%1235 = arith.subi %1234, %c1_i64_1124 : i64
%1236 = arith.floordivsi %1235, %c1_i64_1111 : i64
%1237 = arith.addi %1236, %c1_i64_1124 : i64
%1238 = arith.index_cast %1237 : i64 to index
%1239 = arith.index_cast %c3_1106 : index to i64
%c1_i64_1126 = arith.constant 1 : i64
%c2_i64_1127 = arith.constant 2 : i64
%1240 = arith.muli %1216, %c2_i64_1127 : i64
%1241 = arith.index_cast %c56_1098 : index to i64
%1242 = arith.addi %1241, %1240 : i64
%1243 = arith.subi %1239, %c1_i64_1126 : i64
%1244 = arith.muli %c1_i64_1110, %1243 : i64
%1245 = arith.subi %1242, %1244 : i64
%1246 = arith.subi %1245, %c1_i64_1126 : i64
%1247 = arith.floordivsi %1246, %c1_i64_1112 : i64
%1248 = arith.addi %1247, %c1_i64_1126 : i64
%1249 = arith.index_cast %1248 : i64 to index
%1250 = tensor.empty(%1238, %1249) : tensor<1x64x?x?xf32>
%1251 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1081 : tensor<64xf32>) outs(%1250 : tensor<1x64x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x64x?x?xf32>
%1252 = arith.floordivsi %c64_1094, %1219 : index
%1253 = arith.floordivsi %c64_1100, %1219 : index
%c0_1128 = arith.constant 0 : index
%c1_1129 = arith.constant 1 : index
%1254 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1123, %cast_1070 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%1251 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
%cast_1130 = tensor.cast %1254 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
%c1_1131 = arith.constant 1 : index
%c1_1132 = arith.constant 1 : index
%c64_1133 = arith.constant 64 : index
%c2_1134 = arith.constant 2 : index
%c56_1135 = arith.constant 56 : index
%c3_1136 = arith.constant 3 : index
%c56_1137 = arith.constant 56 : index
%1255 = tensor.empty() : tensor<1x64x56x56xf32>
%1256 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1130 : tensor<1x64x56x56xf32>) outs(%1255 : tensor<1x64x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x64x56x56xf32>
%cast_1138 = tensor.cast %1256 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%1257 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1258 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1139 = torch.constant.int 12
%1259 = torch.aten.item %1257 : !torch.vtensor<[],f32> -> !torch.float
%1260 = torch_c.to_f64 %1259
%1261 = torch.aten.item %1258 : !torch.vtensor<[],si8> -> !torch.int
%1262 = torch_c.to_i64 %1261
%c1_1140 = arith.constant 1 : index
%c1_1141 = arith.constant 1 : index
%c64_1142 = arith.constant 64 : index
%c2_1143 = arith.constant 2 : index
%c56_1144 = arith.constant 56 : index
%c3_1145 = arith.constant 3 : index
%c56_1146 = arith.constant 56 : index
%1263 = tensor.empty() : tensor<1x64x56x56xi8>
%1264 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1138 : tensor<1x64x56x56xf32>) outs(%1263 : tensor<1x64x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1261
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1259
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x64x56x56xi8>
%cast_1147 = tensor.cast %1264 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%cast_1148 = tensor.cast %cast_1147 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%1265 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1266 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1267 = torch.aten.item %1265 : !torch.vtensor<[],f32> -> !torch.float
%1268 = torch_c.to_f64 %1267
%1269 = torch.aten.item %1266 : !torch.vtensor<[],si8> -> !torch.int
%1270 = torch_c.to_i64 %1269
%cast_1149 = tensor.cast %cast_1148 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
%c1_1150 = arith.constant 1 : index
%c1_1151 = arith.constant 1 : index
%c64_1152 = arith.constant 64 : index
%c2_1153 = arith.constant 2 : index
%c56_1154 = arith.constant 56 : index
%c3_1155 = arith.constant 3 : index
%c56_1156 = arith.constant 56 : index
%1271 = tensor.empty() : tensor<1x64x56x56xf32>
%1272 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1149 : tensor<1x64x56x56xi8>) outs(%1271 : tensor<1x64x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1269
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1267
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x64x56x56xf32>
%cast_1157 = tensor.cast %1272 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
%1273 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1274 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1158 = torch.constant.int 12
%1275 = torch.aten.item %1273 : !torch.vtensor<[],f32> -> !torch.float
%1276 = torch_c.to_f64 %1275
%1277 = torch.aten.item %1274 : !torch.vtensor<[],si8> -> !torch.int
%1278 = torch_c.to_i64 %1277
%c1_1159 = arith.constant 1 : index
%c0_1160 = arith.constant 0 : index
%c256_1161 = arith.constant 256 : index
%c1_1162 = arith.constant 1 : index
%c64_1163 = arith.constant 64 : index
%1279 = tensor.empty() : tensor<256x64x1x1xi8>
%1280 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%42 : tensor<256x64x1x1xf32>) outs(%1279 : tensor<256x64x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1277
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1275
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x64x1x1xi8>
%cast_1164 = tensor.cast %1280 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%cast_1165 = tensor.cast %cast_1164 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%1281 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1282 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1283 = torch.aten.item %1281 : !torch.vtensor<[],f32> -> !torch.float
%1284 = torch_c.to_f64 %1283
%1285 = torch.aten.item %1282 : !torch.vtensor<[],si8> -> !torch.int
%1286 = torch_c.to_i64 %1285
%cast_1166 = tensor.cast %cast_1165 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
%c1_1167 = arith.constant 1 : index
%c0_1168 = arith.constant 0 : index
%c256_1169 = arith.constant 256 : index
%c1_1170 = arith.constant 1 : index
%c64_1171 = arith.constant 64 : index
%1287 = tensor.empty() : tensor<256x64x1x1xf32>
%1288 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1166 : tensor<256x64x1x1xi8>) outs(%1287 : tensor<256x64x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1285
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1283
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x64x1x1xf32>
%cast_1172 = tensor.cast %1288 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
%1289 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1290 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1173 = torch.constant.int 12
%1291 = torch.aten.item %1289 : !torch.vtensor<[],f32> -> !torch.float
%1292 = torch_c.to_f64 %1291
%1293 = torch.aten.item %1290 : !torch.vtensor<[],si8> -> !torch.int
%1294 = torch_c.to_i64 %1293
%c1_1174 = arith.constant 1 : index
%c0_1175 = arith.constant 0 : index
%c256_1176 = arith.constant 256 : index
%1295 = tensor.empty() : tensor<256xi8>
%1296 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%44 : tensor<256xf32>) outs(%1295 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1293
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1291
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_1177 = tensor.cast %1296 : tensor<256xi8> to tensor<256xi8>
%cast_1178 = tensor.cast %cast_1177 : tensor<256xi8> to tensor<256xi8>
%1297 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1298 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1299 = torch.aten.item %1297 : !torch.vtensor<[],f32> -> !torch.float
%1300 = torch_c.to_f64 %1299
%1301 = torch.aten.item %1298 : !torch.vtensor<[],si8> -> !torch.int
%1302 = torch_c.to_i64 %1301
%cast_1179 = tensor.cast %cast_1178 : tensor<256xi8> to tensor<256xi8>
%c1_1180 = arith.constant 1 : index
%c0_1181 = arith.constant 0 : index
%c256_1182 = arith.constant 256 : index
%1303 = tensor.empty() : tensor<256xf32>
%1304 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1179 : tensor<256xi8>) outs(%1303 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1301
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1299
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_1183 = tensor.cast %1304 : tensor<256xf32> to tensor<256xf32>
%int0_1184 = torch.constant.int 0
%int0_1185 = torch.constant.int 0
%int1_1186 = torch.constant.int 1
%int1_1187 = torch.constant.int 1
%int1_1188 = torch.constant.int 1
%int1_1189 = torch.constant.int 1
%int0_1190 = torch.constant.int 0
%1305 = torch.prim.ListConstruct %int0_1184, %int0_1185 : (!torch.int, !torch.int) -> !torch.list<int>
%1306 = torch.prim.ListConstruct %int1_1186, %int1_1187 : (!torch.int, !torch.int) -> !torch.list<int>
%1307 = torch.prim.ListConstruct %int1_1188, %int1_1189 : (!torch.int, !torch.int) -> !torch.list<int>
%1308 = torch.prim.ListConstruct %int0_1190, %int0_1190 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1191 = torch.constant.bool false
%int1_1192 = torch.constant.int 1
%1309 = torch_c.to_i64 %int1_1192
%1310 = torch_c.to_i64 %int0_1184
%1311 = torch_c.to_i64 %int0_1185
%1312 = torch_c.to_i64 %int0_1190
%1313 = torch_c.to_i64 %int0_1190
%c0_1193 = arith.constant 0 : index
%c1_1194 = arith.constant 1 : index
%c1_1195 = arith.constant 1 : index
%c64_1196 = arith.constant 64 : index
%c2_1197 = arith.constant 2 : index
%c56_1198 = arith.constant 56 : index
%c3_1199 = arith.constant 3 : index
%c56_1200 = arith.constant 56 : index
%c0_1201 = arith.constant 0 : index
%c256_1202 = arith.constant 256 : index
%c1_1203 = arith.constant 1 : index
%c64_1204 = arith.constant 64 : index
%c2_1205 = arith.constant 2 : index
%c1_1206 = arith.constant 1 : index
%c3_1207 = arith.constant 3 : index
%c1_1208 = arith.constant 1 : index
%1314 = arith.index_cast %1309 : i64 to index
%c0_1209 = arith.constant 0 : index
%1315 = arith.remsi %c64_1196, %1314 : index
%1316 = arith.cmpi eq, %c0_1209, %1315 : index
cf.assert %1316, "invalid: groups must divide input channel size evenly."
%c0_1210 = arith.constant 0 : index
%1317 = arith.remsi %c256_1202, %1314 : index
%1318 = arith.cmpi eq, %c0_1210, %1317 : index
cf.assert %1318, "invalid: groups must divide weight batch size evenly."
%c1_i64_1211 = arith.constant 1 : i64
%c1_i64_1212 = arith.constant 1 : i64
%c1_i64_1213 = arith.constant 1 : i64
%c1_i64_1214 = arith.constant 1 : i64
%cst_1215 = arith.constant 0.000000e+00 : f32
%c0_1216 = arith.constant 0 : index
%c1_1217 = arith.constant 1 : index
%c1_1218 = arith.constant 1 : index
%c64_1219 = arith.constant 64 : index
%c2_1220 = arith.constant 2 : index
%c56_1221 = arith.constant 56 : index
%c3_1222 = arith.constant 3 : index
%c56_1223 = arith.constant 56 : index
%c0_i64_1224 = arith.constant 0 : i64
%1319 = arith.index_cast %c0_i64_1224 : i64 to index
%1320 = arith.index_cast %c0_i64_1224 : i64 to index
%1321 = arith.index_cast %1310 : i64 to index
%1322 = arith.index_cast %1311 : i64 to index
%padded_1225 = tensor.pad %cast_1157 low[%1319, %1320, %1321, %1322] high[%1319, %1320, %1321, %1322] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1215 : f32
} : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
%1323 = arith.index_cast %c1_1206 : index to i64
%c1_i64_1226 = arith.constant 1 : i64
%c2_i64_1227 = arith.constant 2 : i64
%1324 = arith.muli %1310, %c2_i64_1227 : i64
%1325 = arith.index_cast %c56_1198 : index to i64
%1326 = arith.addi %1325, %1324 : i64
%1327 = arith.subi %1323, %c1_i64_1226 : i64
%1328 = arith.muli %c1_i64_1211, %1327 : i64
%1329 = arith.subi %1326, %1328 : i64
%1330 = arith.subi %1329, %c1_i64_1226 : i64
%1331 = arith.floordivsi %1330, %c1_i64_1213 : i64
%1332 = arith.addi %1331, %c1_i64_1226 : i64
%1333 = arith.index_cast %1332 : i64 to index
%1334 = arith.index_cast %c1_1208 : index to i64
%c1_i64_1228 = arith.constant 1 : i64
%c2_i64_1229 = arith.constant 2 : i64
%1335 = arith.muli %1311, %c2_i64_1229 : i64
%1336 = arith.index_cast %c56_1200 : index to i64
%1337 = arith.addi %1336, %1335 : i64
%1338 = arith.subi %1334, %c1_i64_1228 : i64
%1339 = arith.muli %c1_i64_1212, %1338 : i64
%1340 = arith.subi %1337, %1339 : i64
%1341 = arith.subi %1340, %c1_i64_1228 : i64
%1342 = arith.floordivsi %1341, %c1_i64_1214 : i64
%1343 = arith.addi %1342, %c1_i64_1228 : i64
%1344 = arith.index_cast %1343 : i64 to index
%1345 = tensor.empty(%1333, %1344) : tensor<1x256x?x?xf32>
%1346 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1183 : tensor<256xf32>) outs(%1345 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%1347 = arith.floordivsi %c64_1196, %1314 : index
%1348 = arith.floordivsi %c256_1202, %1314 : index
%c0_1230 = arith.constant 0 : index
%c1_1231 = arith.constant 1 : index
%1349 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1225, %cast_1172 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%1346 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_1232 = tensor.cast %1349 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
%1350 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1351 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1233 = torch.constant.int 12
%1352 = torch.aten.item %1350 : !torch.vtensor<[],f32> -> !torch.float
%1353 = torch_c.to_f64 %1352
%1354 = torch.aten.item %1351 : !torch.vtensor<[],si8> -> !torch.int
%1355 = torch_c.to_i64 %1354
%c1_1234 = arith.constant 1 : index
%c1_1235 = arith.constant 1 : index
%c256_1236 = arith.constant 256 : index
%c2_1237 = arith.constant 2 : index
%c56_1238 = arith.constant 56 : index
%c3_1239 = arith.constant 3 : index
%c56_1240 = arith.constant 56 : index
%1356 = tensor.empty() : tensor<1x256x56x56xi8>
%1357 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1232 : tensor<1x256x56x56xf32>) outs(%1356 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1354
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1352
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_1241 = tensor.cast %1357 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_1242 = tensor.cast %cast_1241 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%1358 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1359 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1360 = torch.aten.item %1358 : !torch.vtensor<[],f32> -> !torch.float
%1361 = torch_c.to_f64 %1360
%1362 = torch.aten.item %1359 : !torch.vtensor<[],si8> -> !torch.int
%1363 = torch_c.to_i64 %1362
%cast_1243 = tensor.cast %cast_1242 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_1244 = arith.constant 1 : index
%c1_1245 = arith.constant 1 : index
%c256_1246 = arith.constant 256 : index
%c2_1247 = arith.constant 2 : index
%c56_1248 = arith.constant 56 : index
%c3_1249 = arith.constant 3 : index
%c56_1250 = arith.constant 56 : index
%1364 = tensor.empty() : tensor<1x256x56x56xf32>
%1365 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1243 : tensor<1x256x56x56xi8>) outs(%1364 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1362
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1360
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_1251 = tensor.cast %1365 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%int1_1252 = torch.constant.int 1
%1366 = torch_c.to_i64 %int1_1252
%c1_1253 = arith.constant 1 : index
%c1_1254 = arith.constant 1 : index
%c256_1255 = arith.constant 256 : index
%c2_1256 = arith.constant 2 : index
%c56_1257 = arith.constant 56 : index
%c3_1258 = arith.constant 3 : index
%c56_1259 = arith.constant 56 : index
%c1_1260 = arith.constant 1 : index
%c256_1261 = arith.constant 256 : index
%1367 = arith.cmpi eq, %c256_1255, %c256_1261 : index
cf.assert %1367, "mismatched size for broadcast"
%c2_1262 = arith.constant 2 : index
%c56_1263 = arith.constant 56 : index
%1368 = arith.cmpi eq, %c56_1257, %c56_1263 : index
cf.assert %1368, "mismatched size for broadcast"
%c3_1264 = arith.constant 3 : index
%c56_1265 = arith.constant 56 : index
%1369 = arith.cmpi eq, %c56_1259, %c56_1265 : index
cf.assert %1369, "mismatched size for broadcast"
%1370 = tensor.empty() : tensor<1x256x56x56xf32>
%1371 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1251, %cast_945 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%1370 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %1366 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x256x56x56xf32>
%cast_1266 = tensor.cast %1371 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%c1_1267 = arith.constant 1 : index
%c1_1268 = arith.constant 1 : index
%c256_1269 = arith.constant 256 : index
%c2_1270 = arith.constant 2 : index
%c56_1271 = arith.constant 56 : index
%c3_1272 = arith.constant 3 : index
%c56_1273 = arith.constant 56 : index
%1372 = tensor.empty() : tensor<1x256x56x56xf32>
%1373 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1266 : tensor<1x256x56x56xf32>) outs(%1372 : tensor<1x256x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x56x56xf32>
%cast_1274 = tensor.cast %1373 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%1374 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1375 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1275 = torch.constant.int 12
%1376 = torch.aten.item %1374 : !torch.vtensor<[],f32> -> !torch.float
%1377 = torch_c.to_f64 %1376
%1378 = torch.aten.item %1375 : !torch.vtensor<[],si8> -> !torch.int
%1379 = torch_c.to_i64 %1378
%c1_1276 = arith.constant 1 : index
%c1_1277 = arith.constant 1 : index
%c256_1278 = arith.constant 256 : index
%c2_1279 = arith.constant 2 : index
%c56_1280 = arith.constant 56 : index
%c3_1281 = arith.constant 3 : index
%c56_1282 = arith.constant 56 : index
%1380 = tensor.empty() : tensor<1x256x56x56xi8>
%1381 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1274 : tensor<1x256x56x56xf32>) outs(%1380 : tensor<1x256x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1378
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1376
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x56x56xi8>
%cast_1283 = tensor.cast %1381 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%cast_1284 = tensor.cast %cast_1283 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%1382 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1383 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1384 = torch.aten.item %1382 : !torch.vtensor<[],f32> -> !torch.float
%1385 = torch_c.to_f64 %1384
%1386 = torch.aten.item %1383 : !torch.vtensor<[],si8> -> !torch.int
%1387 = torch_c.to_i64 %1386
%cast_1285 = tensor.cast %cast_1284 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
%c1_1286 = arith.constant 1 : index
%c1_1287 = arith.constant 1 : index
%c256_1288 = arith.constant 256 : index
%c2_1289 = arith.constant 2 : index
%c56_1290 = arith.constant 56 : index
%c3_1291 = arith.constant 3 : index
%c56_1292 = arith.constant 56 : index
%1388 = tensor.empty() : tensor<1x256x56x56xf32>
%1389 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1285 : tensor<1x256x56x56xi8>) outs(%1388 : tensor<1x256x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1386
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1384
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x56x56xf32>
%cast_1293 = tensor.cast %1389 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
%1390 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1391 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1294 = torch.constant.int 12
%1392 = torch.aten.item %1390 : !torch.vtensor<[],f32> -> !torch.float
%1393 = torch_c.to_f64 %1392
%1394 = torch.aten.item %1391 : !torch.vtensor<[],si8> -> !torch.int
%1395 = torch_c.to_i64 %1394
%c1_1295 = arith.constant 1 : index
%c0_1296 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c1_1297 = arith.constant 1 : index
%c256_1298 = arith.constant 256 : index
%1396 = tensor.empty() : tensor<128x256x1x1xi8>
%1397 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46 : tensor<128x256x1x1xf32>) outs(%1396 : tensor<128x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1394
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1392
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x256x1x1xi8>
%cast_1299 = tensor.cast %1397 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
%cast_1300 = tensor.cast %cast_1299 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
%1398 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1399 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1400 = torch.aten.item %1398 : !torch.vtensor<[],f32> -> !torch.float
%1401 = torch_c.to_f64 %1400
%1402 = torch.aten.item %1399 : !torch.vtensor<[],si8> -> !torch.int
%1403 = torch_c.to_i64 %1402
%cast_1301 = tensor.cast %cast_1300 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
%c1_1302 = arith.constant 1 : index
%c0_1303 = arith.constant 0 : index
%c128_1304 = arith.constant 128 : index
%c1_1305 = arith.constant 1 : index
%c256_1306 = arith.constant 256 : index
%1404 = tensor.empty() : tensor<128x256x1x1xf32>
%1405 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1301 : tensor<128x256x1x1xi8>) outs(%1404 : tensor<128x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1402
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1400
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x256x1x1xf32>
%cast_1307 = tensor.cast %1405 : tensor<128x256x1x1xf32> to tensor<128x256x1x1xf32>
%1406 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1407 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1308 = torch.constant.int 12
%1408 = torch.aten.item %1406 : !torch.vtensor<[],f32> -> !torch.float
%1409 = torch_c.to_f64 %1408
%1410 = torch.aten.item %1407 : !torch.vtensor<[],si8> -> !torch.int
%1411 = torch_c.to_i64 %1410
%c1_1309 = arith.constant 1 : index
%c0_1310 = arith.constant 0 : index
%c128_1311 = arith.constant 128 : index
%1412 = tensor.empty() : tensor<128xi8>
%1413 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%48 : tensor<128xf32>) outs(%1412 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1410
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1408
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_1312 = tensor.cast %1413 : tensor<128xi8> to tensor<128xi8>
%cast_1313 = tensor.cast %cast_1312 : tensor<128xi8> to tensor<128xi8>
%1414 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1415 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1416 = torch.aten.item %1414 : !torch.vtensor<[],f32> -> !torch.float
%1417 = torch_c.to_f64 %1416
%1418 = torch.aten.item %1415 : !torch.vtensor<[],si8> -> !torch.int
%1419 = torch_c.to_i64 %1418
%cast_1314 = tensor.cast %cast_1313 : tensor<128xi8> to tensor<128xi8>
%c1_1315 = arith.constant 1 : index
%c0_1316 = arith.constant 0 : index
%c128_1317 = arith.constant 128 : index
%1420 = tensor.empty() : tensor<128xf32>
%1421 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1314 : tensor<128xi8>) outs(%1420 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1418
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1416
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_1318 = tensor.cast %1421 : tensor<128xf32> to tensor<128xf32>
%int0_1319 = torch.constant.int 0
%int0_1320 = torch.constant.int 0
%int1_1321 = torch.constant.int 1
%int1_1322 = torch.constant.int 1
%int1_1323 = torch.constant.int 1
%int1_1324 = torch.constant.int 1
%int0_1325 = torch.constant.int 0
%1422 = torch.prim.ListConstruct %int0_1319, %int0_1320 : (!torch.int, !torch.int) -> !torch.list<int>
%1423 = torch.prim.ListConstruct %int1_1321, %int1_1322 : (!torch.int, !torch.int) -> !torch.list<int>
%1424 = torch.prim.ListConstruct %int1_1323, %int1_1324 : (!torch.int, !torch.int) -> !torch.list<int>
%1425 = torch.prim.ListConstruct %int0_1325, %int0_1325 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1326 = torch.constant.bool false
%int1_1327 = torch.constant.int 1
%1426 = torch_c.to_i64 %int1_1327
%1427 = torch_c.to_i64 %int0_1319
%1428 = torch_c.to_i64 %int0_1320
%1429 = torch_c.to_i64 %int0_1325
%1430 = torch_c.to_i64 %int0_1325
%c0_1328 = arith.constant 0 : index
%c1_1329 = arith.constant 1 : index
%c1_1330 = arith.constant 1 : index
%c256_1331 = arith.constant 256 : index
%c2_1332 = arith.constant 2 : index
%c56_1333 = arith.constant 56 : index
%c3_1334 = arith.constant 3 : index
%c56_1335 = arith.constant 56 : index
%c0_1336 = arith.constant 0 : index
%c128_1337 = arith.constant 128 : index
%c1_1338 = arith.constant 1 : index
%c256_1339 = arith.constant 256 : index
%c2_1340 = arith.constant 2 : index
%c1_1341 = arith.constant 1 : index
%c3_1342 = arith.constant 3 : index
%c1_1343 = arith.constant 1 : index
%1431 = arith.index_cast %1426 : i64 to index
%c0_1344 = arith.constant 0 : index
%1432 = arith.remsi %c256_1331, %1431 : index
%1433 = arith.cmpi eq, %c0_1344, %1432 : index
cf.assert %1433, "invalid: groups must divide input channel size evenly."
%c0_1345 = arith.constant 0 : index
%1434 = arith.remsi %c128_1337, %1431 : index
%1435 = arith.cmpi eq, %c0_1345, %1434 : index
cf.assert %1435, "invalid: groups must divide weight batch size evenly."
%c1_i64_1346 = arith.constant 1 : i64
%c1_i64_1347 = arith.constant 1 : i64
%c1_i64_1348 = arith.constant 1 : i64
%c1_i64_1349 = arith.constant 1 : i64
%cst_1350 = arith.constant 0.000000e+00 : f32
%c0_1351 = arith.constant 0 : index
%c1_1352 = arith.constant 1 : index
%c1_1353 = arith.constant 1 : index
%c256_1354 = arith.constant 256 : index
%c2_1355 = arith.constant 2 : index
%c56_1356 = arith.constant 56 : index
%c3_1357 = arith.constant 3 : index
%c56_1358 = arith.constant 56 : index
%c0_i64_1359 = arith.constant 0 : i64
%1436 = arith.index_cast %c0_i64_1359 : i64 to index
%1437 = arith.index_cast %c0_i64_1359 : i64 to index
%1438 = arith.index_cast %1427 : i64 to index
%1439 = arith.index_cast %1428 : i64 to index
%padded_1360 = tensor.pad %cast_1293 low[%1436, %1437, %1438, %1439] high[%1436, %1437, %1438, %1439] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1350 : f32
} : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
%1440 = arith.index_cast %c1_1341 : index to i64
%c1_i64_1361 = arith.constant 1 : i64
%c2_i64_1362 = arith.constant 2 : i64
%1441 = arith.muli %1427, %c2_i64_1362 : i64
%1442 = arith.index_cast %c56_1333 : index to i64
%1443 = arith.addi %1442, %1441 : i64
%1444 = arith.subi %1440, %c1_i64_1361 : i64
%1445 = arith.muli %c1_i64_1346, %1444 : i64
%1446 = arith.subi %1443, %1445 : i64
%1447 = arith.subi %1446, %c1_i64_1361 : i64
%1448 = arith.floordivsi %1447, %c1_i64_1348 : i64
%1449 = arith.addi %1448, %c1_i64_1361 : i64
%1450 = arith.index_cast %1449 : i64 to index
%1451 = arith.index_cast %c1_1343 : index to i64
%c1_i64_1363 = arith.constant 1 : i64
%c2_i64_1364 = arith.constant 2 : i64
%1452 = arith.muli %1428, %c2_i64_1364 : i64
%1453 = arith.index_cast %c56_1335 : index to i64
%1454 = arith.addi %1453, %1452 : i64
%1455 = arith.subi %1451, %c1_i64_1363 : i64
%1456 = arith.muli %c1_i64_1347, %1455 : i64
%1457 = arith.subi %1454, %1456 : i64
%1458 = arith.subi %1457, %c1_i64_1363 : i64
%1459 = arith.floordivsi %1458, %c1_i64_1349 : i64
%1460 = arith.addi %1459, %c1_i64_1363 : i64
%1461 = arith.index_cast %1460 : i64 to index
%1462 = tensor.empty(%1450, %1461) : tensor<1x128x?x?xf32>
%1463 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1318 : tensor<128xf32>) outs(%1462 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%1464 = arith.floordivsi %c256_1331, %1431 : index
%1465 = arith.floordivsi %c128_1337, %1431 : index
%c0_1365 = arith.constant 0 : index
%c1_1366 = arith.constant 1 : index
%1466 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1360, %cast_1307 : tensor<?x?x?x?xf32>, tensor<128x256x1x1xf32>) outs(%1463 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_1367 = tensor.cast %1466 : tensor<1x128x?x?xf32> to tensor<1x128x56x56xf32>
%c1_1368 = arith.constant 1 : index
%c1_1369 = arith.constant 1 : index
%c128_1370 = arith.constant 128 : index
%c2_1371 = arith.constant 2 : index
%c56_1372 = arith.constant 56 : index
%c3_1373 = arith.constant 3 : index
%c56_1374 = arith.constant 56 : index
%1467 = tensor.empty() : tensor<1x128x56x56xf32>
%1468 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1367 : tensor<1x128x56x56xf32>) outs(%1467 : tensor<1x128x56x56xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x56x56xf32>
%cast_1375 = tensor.cast %1468 : tensor<1x128x56x56xf32> to tensor<1x128x56x56xf32>
%1469 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1470 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1376 = torch.constant.int 12
%1471 = torch.aten.item %1469 : !torch.vtensor<[],f32> -> !torch.float
%1472 = torch_c.to_f64 %1471
%1473 = torch.aten.item %1470 : !torch.vtensor<[],si8> -> !torch.int
%1474 = torch_c.to_i64 %1473
%c1_1377 = arith.constant 1 : index
%c1_1378 = arith.constant 1 : index
%c128_1379 = arith.constant 128 : index
%c2_1380 = arith.constant 2 : index
%c56_1381 = arith.constant 56 : index
%c3_1382 = arith.constant 3 : index
%c56_1383 = arith.constant 56 : index
%1475 = tensor.empty() : tensor<1x128x56x56xi8>
%1476 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1375 : tensor<1x128x56x56xf32>) outs(%1475 : tensor<1x128x56x56xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1473
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1471
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x56x56xi8>
%cast_1384 = tensor.cast %1476 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
%cast_1385 = tensor.cast %cast_1384 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
%1477 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1478 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1479 = torch.aten.item %1477 : !torch.vtensor<[],f32> -> !torch.float
%1480 = torch_c.to_f64 %1479
%1481 = torch.aten.item %1478 : !torch.vtensor<[],si8> -> !torch.int
%1482 = torch_c.to_i64 %1481
%cast_1386 = tensor.cast %cast_1385 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
%c1_1387 = arith.constant 1 : index
%c1_1388 = arith.constant 1 : index
%c128_1389 = arith.constant 128 : index
%c2_1390 = arith.constant 2 : index
%c56_1391 = arith.constant 56 : index
%c3_1392 = arith.constant 3 : index
%c56_1393 = arith.constant 56 : index
%1483 = tensor.empty() : tensor<1x128x56x56xf32>
%1484 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1386 : tensor<1x128x56x56xi8>) outs(%1483 : tensor<1x128x56x56xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1481
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1479
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x56x56xf32>
%cast_1394 = tensor.cast %1484 : tensor<1x128x56x56xf32> to tensor<1x128x56x56xf32>
%1485 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1486 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1395 = torch.constant.int 12
%1487 = torch.aten.item %1485 : !torch.vtensor<[],f32> -> !torch.float
%1488 = torch_c.to_f64 %1487
%1489 = torch.aten.item %1486 : !torch.vtensor<[],si8> -> !torch.int
%1490 = torch_c.to_i64 %1489
%c1_1396 = arith.constant 1 : index
%c0_1397 = arith.constant 0 : index
%c128_1398 = arith.constant 128 : index
%c1_1399 = arith.constant 1 : index
%c128_1400 = arith.constant 128 : index
%c2_1401 = arith.constant 2 : index
%c3_1402 = arith.constant 3 : index
%c3_1403 = arith.constant 3 : index
%c3_1404 = arith.constant 3 : index
%1491 = tensor.empty() : tensor<128x128x3x3xi8>
%1492 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50 : tensor<128x128x3x3xf32>) outs(%1491 : tensor<128x128x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1489
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1487
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x128x3x3xi8>
%cast_1405 = tensor.cast %1492 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%cast_1406 = tensor.cast %cast_1405 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%1493 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1494 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1495 = torch.aten.item %1493 : !torch.vtensor<[],f32> -> !torch.float
%1496 = torch_c.to_f64 %1495
%1497 = torch.aten.item %1494 : !torch.vtensor<[],si8> -> !torch.int
%1498 = torch_c.to_i64 %1497
%cast_1407 = tensor.cast %cast_1406 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%c1_1408 = arith.constant 1 : index
%c0_1409 = arith.constant 0 : index
%c128_1410 = arith.constant 128 : index
%c1_1411 = arith.constant 1 : index
%c128_1412 = arith.constant 128 : index
%c2_1413 = arith.constant 2 : index
%c3_1414 = arith.constant 3 : index
%c3_1415 = arith.constant 3 : index
%c3_1416 = arith.constant 3 : index
%1499 = tensor.empty() : tensor<128x128x3x3xf32>
%1500 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1407 : tensor<128x128x3x3xi8>) outs(%1499 : tensor<128x128x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1497
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1495
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x128x3x3xf32>
%cast_1417 = tensor.cast %1500 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
%1501 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1502 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1418 = torch.constant.int 12
%1503 = torch.aten.item %1501 : !torch.vtensor<[],f32> -> !torch.float
%1504 = torch_c.to_f64 %1503
%1505 = torch.aten.item %1502 : !torch.vtensor<[],si8> -> !torch.int
%1506 = torch_c.to_i64 %1505
%c1_1419 = arith.constant 1 : index
%c0_1420 = arith.constant 0 : index
%c128_1421 = arith.constant 128 : index
%1507 = tensor.empty() : tensor<128xi8>
%1508 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%52 : tensor<128xf32>) outs(%1507 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1505
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1503
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_1422 = tensor.cast %1508 : tensor<128xi8> to tensor<128xi8>
%cast_1423 = tensor.cast %cast_1422 : tensor<128xi8> to tensor<128xi8>
%1509 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1510 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1511 = torch.aten.item %1509 : !torch.vtensor<[],f32> -> !torch.float
%1512 = torch_c.to_f64 %1511
%1513 = torch.aten.item %1510 : !torch.vtensor<[],si8> -> !torch.int
%1514 = torch_c.to_i64 %1513
%cast_1424 = tensor.cast %cast_1423 : tensor<128xi8> to tensor<128xi8>
%c1_1425 = arith.constant 1 : index
%c0_1426 = arith.constant 0 : index
%c128_1427 = arith.constant 128 : index
%1515 = tensor.empty() : tensor<128xf32>
%1516 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1424 : tensor<128xi8>) outs(%1515 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1513
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1511
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_1428 = tensor.cast %1516 : tensor<128xf32> to tensor<128xf32>
%int1_1429 = torch.constant.int 1
%int1_1430 = torch.constant.int 1
%int1_1431 = torch.constant.int 1
%int1_1432 = torch.constant.int 1
%int2_1433 = torch.constant.int 2
%int2_1434 = torch.constant.int 2
%int0_1435 = torch.constant.int 0
%1517 = torch.prim.ListConstruct %int1_1429, %int1_1430 : (!torch.int, !torch.int) -> !torch.list<int>
%1518 = torch.prim.ListConstruct %int1_1431, %int1_1432 : (!torch.int, !torch.int) -> !torch.list<int>
%1519 = torch.prim.ListConstruct %int2_1433, %int2_1434 : (!torch.int, !torch.int) -> !torch.list<int>
%1520 = torch.prim.ListConstruct %int0_1435, %int0_1435 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1436 = torch.constant.bool false
%int1_1437 = torch.constant.int 1
%1521 = torch_c.to_i64 %int1_1437
%1522 = torch_c.to_i64 %int1_1429
%1523 = torch_c.to_i64 %int1_1430
%1524 = torch_c.to_i64 %int0_1435
%1525 = torch_c.to_i64 %int0_1435
%c0_1438 = arith.constant 0 : index
%c1_1439 = arith.constant 1 : index
%c1_1440 = arith.constant 1 : index
%c128_1441 = arith.constant 128 : index
%c2_1442 = arith.constant 2 : index
%c56_1443 = arith.constant 56 : index
%c3_1444 = arith.constant 3 : index
%c56_1445 = arith.constant 56 : index
%c0_1446 = arith.constant 0 : index
%c128_1447 = arith.constant 128 : index
%c1_1448 = arith.constant 1 : index
%c128_1449 = arith.constant 128 : index
%c2_1450 = arith.constant 2 : index
%c3_1451 = arith.constant 3 : index
%c3_1452 = arith.constant 3 : index
%c3_1453 = arith.constant 3 : index
%1526 = arith.index_cast %1521 : i64 to index
%c0_1454 = arith.constant 0 : index
%1527 = arith.remsi %c128_1441, %1526 : index
%1528 = arith.cmpi eq, %c0_1454, %1527 : index
cf.assert %1528, "invalid: groups must divide input channel size evenly."
%c0_1455 = arith.constant 0 : index
%1529 = arith.remsi %c128_1447, %1526 : index
%1530 = arith.cmpi eq, %c0_1455, %1529 : index
cf.assert %1530, "invalid: groups must divide weight batch size evenly."
%c1_i64_1456 = arith.constant 1 : i64
%c1_i64_1457 = arith.constant 1 : i64
%c2_i64_1458 = arith.constant 2 : i64
%c2_i64_1459 = arith.constant 2 : i64
%cst_1460 = arith.constant 0.000000e+00 : f32
%c0_1461 = arith.constant 0 : index
%c1_1462 = arith.constant 1 : index
%c1_1463 = arith.constant 1 : index
%c128_1464 = arith.constant 128 : index
%c2_1465 = arith.constant 2 : index
%c56_1466 = arith.constant 56 : index
%c3_1467 = arith.constant 3 : index
%c56_1468 = arith.constant 56 : index
%c0_i64_1469 = arith.constant 0 : i64
%1531 = arith.index_cast %c0_i64_1469 : i64 to index
%1532 = arith.index_cast %c0_i64_1469 : i64 to index
%1533 = arith.index_cast %1522 : i64 to index
%1534 = arith.index_cast %1523 : i64 to index
%padded_1470 = tensor.pad %cast_1394 low[%1531, %1532, %1533, %1534] high[%1531, %1532, %1533, %1534] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1460 : f32
} : tensor<1x128x56x56xf32> to tensor<?x?x?x?xf32>
%1535 = arith.index_cast %c3_1451 : index to i64
%c1_i64_1471 = arith.constant 1 : i64
%c2_i64_1472 = arith.constant 2 : i64
%1536 = arith.muli %1522, %c2_i64_1472 : i64
%1537 = arith.index_cast %c56_1443 : index to i64
%1538 = arith.addi %1537, %1536 : i64
%1539 = arith.subi %1535, %c1_i64_1471 : i64
%1540 = arith.muli %c1_i64_1456, %1539 : i64
%1541 = arith.subi %1538, %1540 : i64
%1542 = arith.subi %1541, %c1_i64_1471 : i64
%1543 = arith.floordivsi %1542, %c2_i64_1458 : i64
%1544 = arith.addi %1543, %c1_i64_1471 : i64
%1545 = arith.index_cast %1544 : i64 to index
%1546 = arith.index_cast %c3_1453 : index to i64
%c1_i64_1473 = arith.constant 1 : i64
%c2_i64_1474 = arith.constant 2 : i64
%1547 = arith.muli %1523, %c2_i64_1474 : i64
%1548 = arith.index_cast %c56_1445 : index to i64
%1549 = arith.addi %1548, %1547 : i64
%1550 = arith.subi %1546, %c1_i64_1473 : i64
%1551 = arith.muli %c1_i64_1457, %1550 : i64
%1552 = arith.subi %1549, %1551 : i64
%1553 = arith.subi %1552, %c1_i64_1473 : i64
%1554 = arith.floordivsi %1553, %c2_i64_1459 : i64
%1555 = arith.addi %1554, %c1_i64_1473 : i64
%1556 = arith.index_cast %1555 : i64 to index
%1557 = tensor.empty(%1545, %1556) : tensor<1x128x?x?xf32>
%1558 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1428 : tensor<128xf32>) outs(%1557 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%1559 = arith.floordivsi %c128_1441, %1526 : index
%1560 = arith.floordivsi %c128_1447, %1526 : index
%c0_1475 = arith.constant 0 : index
%c1_1476 = arith.constant 1 : index
%1561 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_1470, %cast_1417 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%1558 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_1477 = tensor.cast %1561 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_1478 = arith.constant 1 : index
%c1_1479 = arith.constant 1 : index
%c128_1480 = arith.constant 128 : index
%c2_1481 = arith.constant 2 : index
%c28 = arith.constant 28 : index
%c3_1482 = arith.constant 3 : index
%c28_1483 = arith.constant 28 : index
%1562 = tensor.empty() : tensor<1x128x28x28xf32>
%1563 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1477 : tensor<1x128x28x28xf32>) outs(%1562 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_1484 = tensor.cast %1563 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1564 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1565 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1485 = torch.constant.int 12
%1566 = torch.aten.item %1564 : !torch.vtensor<[],f32> -> !torch.float
%1567 = torch_c.to_f64 %1566
%1568 = torch.aten.item %1565 : !torch.vtensor<[],si8> -> !torch.int
%1569 = torch_c.to_i64 %1568
%c1_1486 = arith.constant 1 : index
%c1_1487 = arith.constant 1 : index
%c128_1488 = arith.constant 128 : index
%c2_1489 = arith.constant 2 : index
%c28_1490 = arith.constant 28 : index
%c3_1491 = arith.constant 3 : index
%c28_1492 = arith.constant 28 : index
%1570 = tensor.empty() : tensor<1x128x28x28xi8>
%1571 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1484 : tensor<1x128x28x28xf32>) outs(%1570 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1568
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1566
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_1493 = tensor.cast %1571 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_1494 = tensor.cast %cast_1493 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%1572 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1573 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1574 = torch.aten.item %1572 : !torch.vtensor<[],f32> -> !torch.float
%1575 = torch_c.to_f64 %1574
%1576 = torch.aten.item %1573 : !torch.vtensor<[],si8> -> !torch.int
%1577 = torch_c.to_i64 %1576
%cast_1495 = tensor.cast %cast_1494 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_1496 = arith.constant 1 : index
%c1_1497 = arith.constant 1 : index
%c128_1498 = arith.constant 128 : index
%c2_1499 = arith.constant 2 : index
%c28_1500 = arith.constant 28 : index
%c3_1501 = arith.constant 3 : index
%c28_1502 = arith.constant 28 : index
%1578 = tensor.empty() : tensor<1x128x28x28xf32>
%1579 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1495 : tensor<1x128x28x28xi8>) outs(%1578 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1576
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1574
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_1503 = tensor.cast %1579 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1580 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1581 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1504 = torch.constant.int 12
%1582 = torch.aten.item %1580 : !torch.vtensor<[],f32> -> !torch.float
%1583 = torch_c.to_f64 %1582
%1584 = torch.aten.item %1581 : !torch.vtensor<[],si8> -> !torch.int
%1585 = torch_c.to_i64 %1584
%c1_1505 = arith.constant 1 : index
%c0_1506 = arith.constant 0 : index
%c512 = arith.constant 512 : index
%c1_1507 = arith.constant 1 : index
%c128_1508 = arith.constant 128 : index
%1586 = tensor.empty() : tensor<512x128x1x1xi8>
%1587 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54 : tensor<512x128x1x1xf32>) outs(%1586 : tensor<512x128x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1584
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1582
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x128x1x1xi8>
%cast_1509 = tensor.cast %1587 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%cast_1510 = tensor.cast %cast_1509 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%1588 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1589 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1590 = torch.aten.item %1588 : !torch.vtensor<[],f32> -> !torch.float
%1591 = torch_c.to_f64 %1590
%1592 = torch.aten.item %1589 : !torch.vtensor<[],si8> -> !torch.int
%1593 = torch_c.to_i64 %1592
%cast_1511 = tensor.cast %cast_1510 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%c1_1512 = arith.constant 1 : index
%c0_1513 = arith.constant 0 : index
%c512_1514 = arith.constant 512 : index
%c1_1515 = arith.constant 1 : index
%c128_1516 = arith.constant 128 : index
%1594 = tensor.empty() : tensor<512x128x1x1xf32>
%1595 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1511 : tensor<512x128x1x1xi8>) outs(%1594 : tensor<512x128x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1592
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1590
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x128x1x1xf32>
%cast_1517 = tensor.cast %1595 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
%1596 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1597 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1518 = torch.constant.int 12
%1598 = torch.aten.item %1596 : !torch.vtensor<[],f32> -> !torch.float
%1599 = torch_c.to_f64 %1598
%1600 = torch.aten.item %1597 : !torch.vtensor<[],si8> -> !torch.int
%1601 = torch_c.to_i64 %1600
%c1_1519 = arith.constant 1 : index
%c0_1520 = arith.constant 0 : index
%c512_1521 = arith.constant 512 : index
%1602 = tensor.empty() : tensor<512xi8>
%1603 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%56 : tensor<512xf32>) outs(%1602 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1600
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1598
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_1522 = tensor.cast %1603 : tensor<512xi8> to tensor<512xi8>
%cast_1523 = tensor.cast %cast_1522 : tensor<512xi8> to tensor<512xi8>
%1604 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1605 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1606 = torch.aten.item %1604 : !torch.vtensor<[],f32> -> !torch.float
%1607 = torch_c.to_f64 %1606
%1608 = torch.aten.item %1605 : !torch.vtensor<[],si8> -> !torch.int
%1609 = torch_c.to_i64 %1608
%cast_1524 = tensor.cast %cast_1523 : tensor<512xi8> to tensor<512xi8>
%c1_1525 = arith.constant 1 : index
%c0_1526 = arith.constant 0 : index
%c512_1527 = arith.constant 512 : index
%1610 = tensor.empty() : tensor<512xf32>
%1611 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1524 : tensor<512xi8>) outs(%1610 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1608
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1606
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_1528 = tensor.cast %1611 : tensor<512xf32> to tensor<512xf32>
%int0_1529 = torch.constant.int 0
%int0_1530 = torch.constant.int 0
%int1_1531 = torch.constant.int 1
%int1_1532 = torch.constant.int 1
%int1_1533 = torch.constant.int 1
%int1_1534 = torch.constant.int 1
%int0_1535 = torch.constant.int 0
%1612 = torch.prim.ListConstruct %int0_1529, %int0_1530 : (!torch.int, !torch.int) -> !torch.list<int>
%1613 = torch.prim.ListConstruct %int1_1531, %int1_1532 : (!torch.int, !torch.int) -> !torch.list<int>
%1614 = torch.prim.ListConstruct %int1_1533, %int1_1534 : (!torch.int, !torch.int) -> !torch.list<int>
%1615 = torch.prim.ListConstruct %int0_1535, %int0_1535 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1536 = torch.constant.bool false
%int1_1537 = torch.constant.int 1
%1616 = torch_c.to_i64 %int1_1537
%1617 = torch_c.to_i64 %int0_1529
%1618 = torch_c.to_i64 %int0_1530
%1619 = torch_c.to_i64 %int0_1535
%1620 = torch_c.to_i64 %int0_1535
%c0_1538 = arith.constant 0 : index
%c1_1539 = arith.constant 1 : index
%c1_1540 = arith.constant 1 : index
%c128_1541 = arith.constant 128 : index
%c2_1542 = arith.constant 2 : index
%c28_1543 = arith.constant 28 : index
%c3_1544 = arith.constant 3 : index
%c28_1545 = arith.constant 28 : index
%c0_1546 = arith.constant 0 : index
%c512_1547 = arith.constant 512 : index
%c1_1548 = arith.constant 1 : index
%c128_1549 = arith.constant 128 : index
%c2_1550 = arith.constant 2 : index
%c1_1551 = arith.constant 1 : index
%c3_1552 = arith.constant 3 : index
%c1_1553 = arith.constant 1 : index
%1621 = arith.index_cast %1616 : i64 to index
%c0_1554 = arith.constant 0 : index
%1622 = arith.remsi %c128_1541, %1621 : index
%1623 = arith.cmpi eq, %c0_1554, %1622 : index
cf.assert %1623, "invalid: groups must divide input channel size evenly."
%c0_1555 = arith.constant 0 : index
%1624 = arith.remsi %c512_1547, %1621 : index
%1625 = arith.cmpi eq, %c0_1555, %1624 : index
cf.assert %1625, "invalid: groups must divide weight batch size evenly."
%c1_i64_1556 = arith.constant 1 : i64
%c1_i64_1557 = arith.constant 1 : i64
%c1_i64_1558 = arith.constant 1 : i64
%c1_i64_1559 = arith.constant 1 : i64
%cst_1560 = arith.constant 0.000000e+00 : f32
%c0_1561 = arith.constant 0 : index
%c1_1562 = arith.constant 1 : index
%c1_1563 = arith.constant 1 : index
%c128_1564 = arith.constant 128 : index
%c2_1565 = arith.constant 2 : index
%c28_1566 = arith.constant 28 : index
%c3_1567 = arith.constant 3 : index
%c28_1568 = arith.constant 28 : index
%c0_i64_1569 = arith.constant 0 : i64
%1626 = arith.index_cast %c0_i64_1569 : i64 to index
%1627 = arith.index_cast %c0_i64_1569 : i64 to index
%1628 = arith.index_cast %1617 : i64 to index
%1629 = arith.index_cast %1618 : i64 to index
%padded_1570 = tensor.pad %cast_1503 low[%1626, %1627, %1628, %1629] high[%1626, %1627, %1628, %1629] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1560 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%1630 = arith.index_cast %c1_1551 : index to i64
%c1_i64_1571 = arith.constant 1 : i64
%c2_i64_1572 = arith.constant 2 : i64
%1631 = arith.muli %1617, %c2_i64_1572 : i64
%1632 = arith.index_cast %c28_1543 : index to i64
%1633 = arith.addi %1632, %1631 : i64
%1634 = arith.subi %1630, %c1_i64_1571 : i64
%1635 = arith.muli %c1_i64_1556, %1634 : i64
%1636 = arith.subi %1633, %1635 : i64
%1637 = arith.subi %1636, %c1_i64_1571 : i64
%1638 = arith.floordivsi %1637, %c1_i64_1558 : i64
%1639 = arith.addi %1638, %c1_i64_1571 : i64
%1640 = arith.index_cast %1639 : i64 to index
%1641 = arith.index_cast %c1_1553 : index to i64
%c1_i64_1573 = arith.constant 1 : i64
%c2_i64_1574 = arith.constant 2 : i64
%1642 = arith.muli %1618, %c2_i64_1574 : i64
%1643 = arith.index_cast %c28_1545 : index to i64
%1644 = arith.addi %1643, %1642 : i64
%1645 = arith.subi %1641, %c1_i64_1573 : i64
%1646 = arith.muli %c1_i64_1557, %1645 : i64
%1647 = arith.subi %1644, %1646 : i64
%1648 = arith.subi %1647, %c1_i64_1573 : i64
%1649 = arith.floordivsi %1648, %c1_i64_1559 : i64
%1650 = arith.addi %1649, %c1_i64_1573 : i64
%1651 = arith.index_cast %1650 : i64 to index
%1652 = tensor.empty(%1640, %1651) : tensor<1x512x?x?xf32>
%1653 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1528 : tensor<512xf32>) outs(%1652 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%1654 = arith.floordivsi %c128_1541, %1621 : index
%1655 = arith.floordivsi %c512_1547, %1621 : index
%c0_1575 = arith.constant 0 : index
%c1_1576 = arith.constant 1 : index
%1656 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1570, %cast_1517 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%1653 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_1577 = tensor.cast %1656 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
%1657 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1658 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1578 = torch.constant.int 12
%1659 = torch.aten.item %1657 : !torch.vtensor<[],f32> -> !torch.float
%1660 = torch_c.to_f64 %1659
%1661 = torch.aten.item %1658 : !torch.vtensor<[],si8> -> !torch.int
%1662 = torch_c.to_i64 %1661
%c1_1579 = arith.constant 1 : index
%c1_1580 = arith.constant 1 : index
%c512_1581 = arith.constant 512 : index
%c2_1582 = arith.constant 2 : index
%c28_1583 = arith.constant 28 : index
%c3_1584 = arith.constant 3 : index
%c28_1585 = arith.constant 28 : index
%1663 = tensor.empty() : tensor<1x512x28x28xi8>
%1664 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1577 : tensor<1x512x28x28xf32>) outs(%1663 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1661
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1659
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_1586 = tensor.cast %1664 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_1587 = tensor.cast %cast_1586 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%1665 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1666 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1667 = torch.aten.item %1665 : !torch.vtensor<[],f32> -> !torch.float
%1668 = torch_c.to_f64 %1667
%1669 = torch.aten.item %1666 : !torch.vtensor<[],si8> -> !torch.int
%1670 = torch_c.to_i64 %1669
%cast_1588 = tensor.cast %cast_1587 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_1589 = arith.constant 1 : index
%c1_1590 = arith.constant 1 : index
%c512_1591 = arith.constant 512 : index
%c2_1592 = arith.constant 2 : index
%c28_1593 = arith.constant 28 : index
%c3_1594 = arith.constant 3 : index
%c28_1595 = arith.constant 28 : index
%1671 = tensor.empty() : tensor<1x512x28x28xf32>
%1672 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1588 : tensor<1x512x28x28xi8>) outs(%1671 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1669
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1667
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_1596 = tensor.cast %1672 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%1673 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1674 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1597 = torch.constant.int 12
%1675 = torch.aten.item %1673 : !torch.vtensor<[],f32> -> !torch.float
%1676 = torch_c.to_f64 %1675
%1677 = torch.aten.item %1674 : !torch.vtensor<[],si8> -> !torch.int
%1678 = torch_c.to_i64 %1677
%c1_1598 = arith.constant 1 : index
%c0_1599 = arith.constant 0 : index
%c512_1600 = arith.constant 512 : index
%c1_1601 = arith.constant 1 : index
%c256_1602 = arith.constant 256 : index
%1679 = tensor.empty() : tensor<512x256x1x1xi8>
%1680 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%58 : tensor<512x256x1x1xf32>) outs(%1679 : tensor<512x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1677
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1675
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x256x1x1xi8>
%cast_1603 = tensor.cast %1680 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
%cast_1604 = tensor.cast %cast_1603 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
%1681 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1682 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1683 = torch.aten.item %1681 : !torch.vtensor<[],f32> -> !torch.float
%1684 = torch_c.to_f64 %1683
%1685 = torch.aten.item %1682 : !torch.vtensor<[],si8> -> !torch.int
%1686 = torch_c.to_i64 %1685
%cast_1605 = tensor.cast %cast_1604 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
%c1_1606 = arith.constant 1 : index
%c0_1607 = arith.constant 0 : index
%c512_1608 = arith.constant 512 : index
%c1_1609 = arith.constant 1 : index
%c256_1610 = arith.constant 256 : index
%1687 = tensor.empty() : tensor<512x256x1x1xf32>
%1688 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1605 : tensor<512x256x1x1xi8>) outs(%1687 : tensor<512x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1685
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1683
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x256x1x1xf32>
%cast_1611 = tensor.cast %1688 : tensor<512x256x1x1xf32> to tensor<512x256x1x1xf32>
%1689 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1690 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1612 = torch.constant.int 12
%1691 = torch.aten.item %1689 : !torch.vtensor<[],f32> -> !torch.float
%1692 = torch_c.to_f64 %1691
%1693 = torch.aten.item %1690 : !torch.vtensor<[],si8> -> !torch.int
%1694 = torch_c.to_i64 %1693
%c1_1613 = arith.constant 1 : index
%c0_1614 = arith.constant 0 : index
%c512_1615 = arith.constant 512 : index
%1695 = tensor.empty() : tensor<512xi8>
%1696 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%60 : tensor<512xf32>) outs(%1695 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1693
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1691
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_1616 = tensor.cast %1696 : tensor<512xi8> to tensor<512xi8>
%cast_1617 = tensor.cast %cast_1616 : tensor<512xi8> to tensor<512xi8>
%1697 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1698 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1699 = torch.aten.item %1697 : !torch.vtensor<[],f32> -> !torch.float
%1700 = torch_c.to_f64 %1699
%1701 = torch.aten.item %1698 : !torch.vtensor<[],si8> -> !torch.int
%1702 = torch_c.to_i64 %1701
%cast_1618 = tensor.cast %cast_1617 : tensor<512xi8> to tensor<512xi8>
%c1_1619 = arith.constant 1 : index
%c0_1620 = arith.constant 0 : index
%c512_1621 = arith.constant 512 : index
%1703 = tensor.empty() : tensor<512xf32>
%1704 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1618 : tensor<512xi8>) outs(%1703 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1701
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1699
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_1622 = tensor.cast %1704 : tensor<512xf32> to tensor<512xf32>
%int0_1623 = torch.constant.int 0
%int0_1624 = torch.constant.int 0
%int1_1625 = torch.constant.int 1
%int1_1626 = torch.constant.int 1
%int2_1627 = torch.constant.int 2
%int2_1628 = torch.constant.int 2
%int0_1629 = torch.constant.int 0
%1705 = torch.prim.ListConstruct %int0_1623, %int0_1624 : (!torch.int, !torch.int) -> !torch.list<int>
%1706 = torch.prim.ListConstruct %int1_1625, %int1_1626 : (!torch.int, !torch.int) -> !torch.list<int>
%1707 = torch.prim.ListConstruct %int2_1627, %int2_1628 : (!torch.int, !torch.int) -> !torch.list<int>
%1708 = torch.prim.ListConstruct %int0_1629, %int0_1629 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1630 = torch.constant.bool false
%int1_1631 = torch.constant.int 1
%1709 = torch_c.to_i64 %int1_1631
%1710 = torch_c.to_i64 %int0_1623
%1711 = torch_c.to_i64 %int0_1624
%1712 = torch_c.to_i64 %int0_1629
%1713 = torch_c.to_i64 %int0_1629
%c0_1632 = arith.constant 0 : index
%c1_1633 = arith.constant 1 : index
%c1_1634 = arith.constant 1 : index
%c256_1635 = arith.constant 256 : index
%c2_1636 = arith.constant 2 : index
%c56_1637 = arith.constant 56 : index
%c3_1638 = arith.constant 3 : index
%c56_1639 = arith.constant 56 : index
%c0_1640 = arith.constant 0 : index
%c512_1641 = arith.constant 512 : index
%c1_1642 = arith.constant 1 : index
%c256_1643 = arith.constant 256 : index
%c2_1644 = arith.constant 2 : index
%c1_1645 = arith.constant 1 : index
%c3_1646 = arith.constant 3 : index
%c1_1647 = arith.constant 1 : index
%1714 = arith.index_cast %1709 : i64 to index
%c0_1648 = arith.constant 0 : index
%1715 = arith.remsi %c256_1635, %1714 : index
%1716 = arith.cmpi eq, %c0_1648, %1715 : index
cf.assert %1716, "invalid: groups must divide input channel size evenly."
%c0_1649 = arith.constant 0 : index
%1717 = arith.remsi %c512_1641, %1714 : index
%1718 = arith.cmpi eq, %c0_1649, %1717 : index
cf.assert %1718, "invalid: groups must divide weight batch size evenly."
%c1_i64_1650 = arith.constant 1 : i64
%c1_i64_1651 = arith.constant 1 : i64
%c2_i64_1652 = arith.constant 2 : i64
%c2_i64_1653 = arith.constant 2 : i64
%cst_1654 = arith.constant 0.000000e+00 : f32
%c0_1655 = arith.constant 0 : index
%c1_1656 = arith.constant 1 : index
%c1_1657 = arith.constant 1 : index
%c256_1658 = arith.constant 256 : index
%c2_1659 = arith.constant 2 : index
%c56_1660 = arith.constant 56 : index
%c3_1661 = arith.constant 3 : index
%c56_1662 = arith.constant 56 : index
%c0_i64_1663 = arith.constant 0 : i64
%1719 = arith.index_cast %c0_i64_1663 : i64 to index
%1720 = arith.index_cast %c0_i64_1663 : i64 to index
%1721 = arith.index_cast %1710 : i64 to index
%1722 = arith.index_cast %1711 : i64 to index
%padded_1664 = tensor.pad %cast_1293 low[%1719, %1720, %1721, %1722] high[%1719, %1720, %1721, %1722] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1654 : f32
} : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
%1723 = arith.index_cast %c1_1645 : index to i64
%c1_i64_1665 = arith.constant 1 : i64
%c2_i64_1666 = arith.constant 2 : i64
%1724 = arith.muli %1710, %c2_i64_1666 : i64
%1725 = arith.index_cast %c56_1637 : index to i64
%1726 = arith.addi %1725, %1724 : i64
%1727 = arith.subi %1723, %c1_i64_1665 : i64
%1728 = arith.muli %c1_i64_1650, %1727 : i64
%1729 = arith.subi %1726, %1728 : i64
%1730 = arith.subi %1729, %c1_i64_1665 : i64
%1731 = arith.floordivsi %1730, %c2_i64_1652 : i64
%1732 = arith.addi %1731, %c1_i64_1665 : i64
%1733 = arith.index_cast %1732 : i64 to index
%1734 = arith.index_cast %c1_1647 : index to i64
%c1_i64_1667 = arith.constant 1 : i64
%c2_i64_1668 = arith.constant 2 : i64
%1735 = arith.muli %1711, %c2_i64_1668 : i64
%1736 = arith.index_cast %c56_1639 : index to i64
%1737 = arith.addi %1736, %1735 : i64
%1738 = arith.subi %1734, %c1_i64_1667 : i64
%1739 = arith.muli %c1_i64_1651, %1738 : i64
%1740 = arith.subi %1737, %1739 : i64
%1741 = arith.subi %1740, %c1_i64_1667 : i64
%1742 = arith.floordivsi %1741, %c2_i64_1653 : i64
%1743 = arith.addi %1742, %c1_i64_1667 : i64
%1744 = arith.index_cast %1743 : i64 to index
%1745 = tensor.empty(%1733, %1744) : tensor<1x512x?x?xf32>
%1746 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1622 : tensor<512xf32>) outs(%1745 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%1747 = arith.floordivsi %c256_1635, %1714 : index
%1748 = arith.floordivsi %c512_1641, %1714 : index
%c0_1669 = arith.constant 0 : index
%c1_1670 = arith.constant 1 : index
%1749 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_1664, %cast_1611 : tensor<?x?x?x?xf32>, tensor<512x256x1x1xf32>) outs(%1746 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_1671 = tensor.cast %1749 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
%1750 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1751 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1672 = torch.constant.int 12
%1752 = torch.aten.item %1750 : !torch.vtensor<[],f32> -> !torch.float
%1753 = torch_c.to_f64 %1752
%1754 = torch.aten.item %1751 : !torch.vtensor<[],si8> -> !torch.int
%1755 = torch_c.to_i64 %1754
%c1_1673 = arith.constant 1 : index
%c1_1674 = arith.constant 1 : index
%c512_1675 = arith.constant 512 : index
%c2_1676 = arith.constant 2 : index
%c28_1677 = arith.constant 28 : index
%c3_1678 = arith.constant 3 : index
%c28_1679 = arith.constant 28 : index
%1756 = tensor.empty() : tensor<1x512x28x28xi8>
%1757 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1671 : tensor<1x512x28x28xf32>) outs(%1756 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1754
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1752
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_1680 = tensor.cast %1757 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_1681 = tensor.cast %cast_1680 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%1758 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1759 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1760 = torch.aten.item %1758 : !torch.vtensor<[],f32> -> !torch.float
%1761 = torch_c.to_f64 %1760
%1762 = torch.aten.item %1759 : !torch.vtensor<[],si8> -> !torch.int
%1763 = torch_c.to_i64 %1762
%cast_1682 = tensor.cast %cast_1681 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_1683 = arith.constant 1 : index
%c1_1684 = arith.constant 1 : index
%c512_1685 = arith.constant 512 : index
%c2_1686 = arith.constant 2 : index
%c28_1687 = arith.constant 28 : index
%c3_1688 = arith.constant 3 : index
%c28_1689 = arith.constant 28 : index
%1764 = tensor.empty() : tensor<1x512x28x28xf32>
%1765 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1682 : tensor<1x512x28x28xi8>) outs(%1764 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1762
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1760
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_1690 = tensor.cast %1765 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%int1_1691 = torch.constant.int 1
%1766 = torch_c.to_i64 %int1_1691
%c1_1692 = arith.constant 1 : index
%c1_1693 = arith.constant 1 : index
%c512_1694 = arith.constant 512 : index
%c2_1695 = arith.constant 2 : index
%c28_1696 = arith.constant 28 : index
%c3_1697 = arith.constant 3 : index
%c28_1698 = arith.constant 28 : index
%c1_1699 = arith.constant 1 : index
%c512_1700 = arith.constant 512 : index
%1767 = arith.cmpi eq, %c512_1694, %c512_1700 : index
cf.assert %1767, "mismatched size for broadcast"
%c2_1701 = arith.constant 2 : index
%c28_1702 = arith.constant 28 : index
%1768 = arith.cmpi eq, %c28_1696, %c28_1702 : index
cf.assert %1768, "mismatched size for broadcast"
%c3_1703 = arith.constant 3 : index
%c28_1704 = arith.constant 28 : index
%1769 = arith.cmpi eq, %c28_1698, %c28_1704 : index
cf.assert %1769, "mismatched size for broadcast"
%1770 = tensor.empty() : tensor<1x512x28x28xf32>
%1771 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1596, %cast_1690 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%1770 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %1766 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x512x28x28xf32>
%cast_1705 = tensor.cast %1771 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%c1_1706 = arith.constant 1 : index
%c1_1707 = arith.constant 1 : index
%c512_1708 = arith.constant 512 : index
%c2_1709 = arith.constant 2 : index
%c28_1710 = arith.constant 28 : index
%c3_1711 = arith.constant 3 : index
%c28_1712 = arith.constant 28 : index
%1772 = tensor.empty() : tensor<1x512x28x28xf32>
%1773 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1705 : tensor<1x512x28x28xf32>) outs(%1772 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x28x28xf32>
%cast_1713 = tensor.cast %1773 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%1774 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1775 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1714 = torch.constant.int 12
%1776 = torch.aten.item %1774 : !torch.vtensor<[],f32> -> !torch.float
%1777 = torch_c.to_f64 %1776
%1778 = torch.aten.item %1775 : !torch.vtensor<[],si8> -> !torch.int
%1779 = torch_c.to_i64 %1778
%c1_1715 = arith.constant 1 : index
%c1_1716 = arith.constant 1 : index
%c512_1717 = arith.constant 512 : index
%c2_1718 = arith.constant 2 : index
%c28_1719 = arith.constant 28 : index
%c3_1720 = arith.constant 3 : index
%c28_1721 = arith.constant 28 : index
%1780 = tensor.empty() : tensor<1x512x28x28xi8>
%1781 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1713 : tensor<1x512x28x28xf32>) outs(%1780 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1778
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1776
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_1722 = tensor.cast %1781 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_1723 = tensor.cast %cast_1722 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%1782 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1783 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1784 = torch.aten.item %1782 : !torch.vtensor<[],f32> -> !torch.float
%1785 = torch_c.to_f64 %1784
%1786 = torch.aten.item %1783 : !torch.vtensor<[],si8> -> !torch.int
%1787 = torch_c.to_i64 %1786
%cast_1724 = tensor.cast %cast_1723 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_1725 = arith.constant 1 : index
%c1_1726 = arith.constant 1 : index
%c512_1727 = arith.constant 512 : index
%c2_1728 = arith.constant 2 : index
%c28_1729 = arith.constant 28 : index
%c3_1730 = arith.constant 3 : index
%c28_1731 = arith.constant 28 : index
%1788 = tensor.empty() : tensor<1x512x28x28xf32>
%1789 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1724 : tensor<1x512x28x28xi8>) outs(%1788 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1786
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1784
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_1732 = tensor.cast %1789 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%1790 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%1791 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1733 = torch.constant.int 12
%1792 = torch.aten.item %1790 : !torch.vtensor<[],f32> -> !torch.float
%1793 = torch_c.to_f64 %1792
%1794 = torch.aten.item %1791 : !torch.vtensor<[],si8> -> !torch.int
%1795 = torch_c.to_i64 %1794
%c1_1734 = arith.constant 1 : index
%c0_1735 = arith.constant 0 : index
%c128_1736 = arith.constant 128 : index
%c1_1737 = arith.constant 1 : index
%c512_1738 = arith.constant 512 : index
%1796 = tensor.empty() : tensor<128x512x1x1xi8>
%1797 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%62 : tensor<128x512x1x1xf32>) outs(%1796 : tensor<128x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1794
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1792
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x512x1x1xi8>
%cast_1739 = tensor.cast %1797 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%cast_1740 = tensor.cast %cast_1739 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%1798 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%1799 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1800 = torch.aten.item %1798 : !torch.vtensor<[],f32> -> !torch.float
%1801 = torch_c.to_f64 %1800
%1802 = torch.aten.item %1799 : !torch.vtensor<[],si8> -> !torch.int
%1803 = torch_c.to_i64 %1802
%cast_1741 = tensor.cast %cast_1740 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%c1_1742 = arith.constant 1 : index
%c0_1743 = arith.constant 0 : index
%c128_1744 = arith.constant 128 : index
%c1_1745 = arith.constant 1 : index
%c512_1746 = arith.constant 512 : index
%1804 = tensor.empty() : tensor<128x512x1x1xf32>
%1805 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1741 : tensor<128x512x1x1xi8>) outs(%1804 : tensor<128x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1802
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1800
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x512x1x1xf32>
%cast_1747 = tensor.cast %1805 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
%1806 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1807 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1748 = torch.constant.int 12
%1808 = torch.aten.item %1806 : !torch.vtensor<[],f32> -> !torch.float
%1809 = torch_c.to_f64 %1808
%1810 = torch.aten.item %1807 : !torch.vtensor<[],si8> -> !torch.int
%1811 = torch_c.to_i64 %1810
%c1_1749 = arith.constant 1 : index
%c0_1750 = arith.constant 0 : index
%c128_1751 = arith.constant 128 : index
%1812 = tensor.empty() : tensor<128xi8>
%1813 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%64 : tensor<128xf32>) outs(%1812 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1810
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1808
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_1752 = tensor.cast %1813 : tensor<128xi8> to tensor<128xi8>
%cast_1753 = tensor.cast %cast_1752 : tensor<128xi8> to tensor<128xi8>
%1814 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1815 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1816 = torch.aten.item %1814 : !torch.vtensor<[],f32> -> !torch.float
%1817 = torch_c.to_f64 %1816
%1818 = torch.aten.item %1815 : !torch.vtensor<[],si8> -> !torch.int
%1819 = torch_c.to_i64 %1818
%cast_1754 = tensor.cast %cast_1753 : tensor<128xi8> to tensor<128xi8>
%c1_1755 = arith.constant 1 : index
%c0_1756 = arith.constant 0 : index
%c128_1757 = arith.constant 128 : index
%1820 = tensor.empty() : tensor<128xf32>
%1821 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1754 : tensor<128xi8>) outs(%1820 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1818
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1816
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_1758 = tensor.cast %1821 : tensor<128xf32> to tensor<128xf32>
%int0_1759 = torch.constant.int 0
%int0_1760 = torch.constant.int 0
%int1_1761 = torch.constant.int 1
%int1_1762 = torch.constant.int 1
%int1_1763 = torch.constant.int 1
%int1_1764 = torch.constant.int 1
%int0_1765 = torch.constant.int 0
%1822 = torch.prim.ListConstruct %int0_1759, %int0_1760 : (!torch.int, !torch.int) -> !torch.list<int>
%1823 = torch.prim.ListConstruct %int1_1761, %int1_1762 : (!torch.int, !torch.int) -> !torch.list<int>
%1824 = torch.prim.ListConstruct %int1_1763, %int1_1764 : (!torch.int, !torch.int) -> !torch.list<int>
%1825 = torch.prim.ListConstruct %int0_1765, %int0_1765 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1766 = torch.constant.bool false
%int1_1767 = torch.constant.int 1
%1826 = torch_c.to_i64 %int1_1767
%1827 = torch_c.to_i64 %int0_1759
%1828 = torch_c.to_i64 %int0_1760
%1829 = torch_c.to_i64 %int0_1765
%1830 = torch_c.to_i64 %int0_1765
%c0_1768 = arith.constant 0 : index
%c1_1769 = arith.constant 1 : index
%c1_1770 = arith.constant 1 : index
%c512_1771 = arith.constant 512 : index
%c2_1772 = arith.constant 2 : index
%c28_1773 = arith.constant 28 : index
%c3_1774 = arith.constant 3 : index
%c28_1775 = arith.constant 28 : index
%c0_1776 = arith.constant 0 : index
%c128_1777 = arith.constant 128 : index
%c1_1778 = arith.constant 1 : index
%c512_1779 = arith.constant 512 : index
%c2_1780 = arith.constant 2 : index
%c1_1781 = arith.constant 1 : index
%c3_1782 = arith.constant 3 : index
%c1_1783 = arith.constant 1 : index
%1831 = arith.index_cast %1826 : i64 to index
%c0_1784 = arith.constant 0 : index
%1832 = arith.remsi %c512_1771, %1831 : index
%1833 = arith.cmpi eq, %c0_1784, %1832 : index
cf.assert %1833, "invalid: groups must divide input channel size evenly."
%c0_1785 = arith.constant 0 : index
%1834 = arith.remsi %c128_1777, %1831 : index
%1835 = arith.cmpi eq, %c0_1785, %1834 : index
cf.assert %1835, "invalid: groups must divide weight batch size evenly."
%c1_i64_1786 = arith.constant 1 : i64
%c1_i64_1787 = arith.constant 1 : i64
%c1_i64_1788 = arith.constant 1 : i64
%c1_i64_1789 = arith.constant 1 : i64
%cst_1790 = arith.constant 0.000000e+00 : f32
%c0_1791 = arith.constant 0 : index
%c1_1792 = arith.constant 1 : index
%c1_1793 = arith.constant 1 : index
%c512_1794 = arith.constant 512 : index
%c2_1795 = arith.constant 2 : index
%c28_1796 = arith.constant 28 : index
%c3_1797 = arith.constant 3 : index
%c28_1798 = arith.constant 28 : index
%c0_i64_1799 = arith.constant 0 : i64
%1836 = arith.index_cast %c0_i64_1799 : i64 to index
%1837 = arith.index_cast %c0_i64_1799 : i64 to index
%1838 = arith.index_cast %1827 : i64 to index
%1839 = arith.index_cast %1828 : i64 to index
%padded_1800 = tensor.pad %cast_1732 low[%1836, %1837, %1838, %1839] high[%1836, %1837, %1838, %1839] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1790 : f32
} : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
%1840 = arith.index_cast %c1_1781 : index to i64
%c1_i64_1801 = arith.constant 1 : i64
%c2_i64_1802 = arith.constant 2 : i64
%1841 = arith.muli %1827, %c2_i64_1802 : i64
%1842 = arith.index_cast %c28_1773 : index to i64
%1843 = arith.addi %1842, %1841 : i64
%1844 = arith.subi %1840, %c1_i64_1801 : i64
%1845 = arith.muli %c1_i64_1786, %1844 : i64
%1846 = arith.subi %1843, %1845 : i64
%1847 = arith.subi %1846, %c1_i64_1801 : i64
%1848 = arith.floordivsi %1847, %c1_i64_1788 : i64
%1849 = arith.addi %1848, %c1_i64_1801 : i64
%1850 = arith.index_cast %1849 : i64 to index
%1851 = arith.index_cast %c1_1783 : index to i64
%c1_i64_1803 = arith.constant 1 : i64
%c2_i64_1804 = arith.constant 2 : i64
%1852 = arith.muli %1828, %c2_i64_1804 : i64
%1853 = arith.index_cast %c28_1775 : index to i64
%1854 = arith.addi %1853, %1852 : i64
%1855 = arith.subi %1851, %c1_i64_1803 : i64
%1856 = arith.muli %c1_i64_1787, %1855 : i64
%1857 = arith.subi %1854, %1856 : i64
%1858 = arith.subi %1857, %c1_i64_1803 : i64
%1859 = arith.floordivsi %1858, %c1_i64_1789 : i64
%1860 = arith.addi %1859, %c1_i64_1803 : i64
%1861 = arith.index_cast %1860 : i64 to index
%1862 = tensor.empty(%1850, %1861) : tensor<1x128x?x?xf32>
%1863 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1758 : tensor<128xf32>) outs(%1862 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%1864 = arith.floordivsi %c512_1771, %1831 : index
%1865 = arith.floordivsi %c128_1777, %1831 : index
%c0_1805 = arith.constant 0 : index
%c1_1806 = arith.constant 1 : index
%1866 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1800, %cast_1747 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%1863 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_1807 = tensor.cast %1866 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_1808 = arith.constant 1 : index
%c1_1809 = arith.constant 1 : index
%c128_1810 = arith.constant 128 : index
%c2_1811 = arith.constant 2 : index
%c28_1812 = arith.constant 28 : index
%c3_1813 = arith.constant 3 : index
%c28_1814 = arith.constant 28 : index
%1867 = tensor.empty() : tensor<1x128x28x28xf32>
%1868 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1807 : tensor<1x128x28x28xf32>) outs(%1867 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_1815 = tensor.cast %1868 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1869 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1870 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1816 = torch.constant.int 12
%1871 = torch.aten.item %1869 : !torch.vtensor<[],f32> -> !torch.float
%1872 = torch_c.to_f64 %1871
%1873 = torch.aten.item %1870 : !torch.vtensor<[],si8> -> !torch.int
%1874 = torch_c.to_i64 %1873
%c1_1817 = arith.constant 1 : index
%c1_1818 = arith.constant 1 : index
%c128_1819 = arith.constant 128 : index
%c2_1820 = arith.constant 2 : index
%c28_1821 = arith.constant 28 : index
%c3_1822 = arith.constant 3 : index
%c28_1823 = arith.constant 28 : index
%1875 = tensor.empty() : tensor<1x128x28x28xi8>
%1876 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1815 : tensor<1x128x28x28xf32>) outs(%1875 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1873
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1871
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_1824 = tensor.cast %1876 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_1825 = tensor.cast %cast_1824 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%1877 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1878 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1879 = torch.aten.item %1877 : !torch.vtensor<[],f32> -> !torch.float
%1880 = torch_c.to_f64 %1879
%1881 = torch.aten.item %1878 : !torch.vtensor<[],si8> -> !torch.int
%1882 = torch_c.to_i64 %1881
%cast_1826 = tensor.cast %cast_1825 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_1827 = arith.constant 1 : index
%c1_1828 = arith.constant 1 : index
%c128_1829 = arith.constant 128 : index
%c2_1830 = arith.constant 2 : index
%c28_1831 = arith.constant 28 : index
%c3_1832 = arith.constant 3 : index
%c28_1833 = arith.constant 28 : index
%1883 = tensor.empty() : tensor<1x128x28x28xf32>
%1884 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1826 : tensor<1x128x28x28xi8>) outs(%1883 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1881
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1879
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_1834 = tensor.cast %1884 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1885 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1886 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1835 = torch.constant.int 12
%1887 = torch.aten.item %1885 : !torch.vtensor<[],f32> -> !torch.float
%1888 = torch_c.to_f64 %1887
%1889 = torch.aten.item %1886 : !torch.vtensor<[],si8> -> !torch.int
%1890 = torch_c.to_i64 %1889
%c1_1836 = arith.constant 1 : index
%c0_1837 = arith.constant 0 : index
%c128_1838 = arith.constant 128 : index
%c1_1839 = arith.constant 1 : index
%c128_1840 = arith.constant 128 : index
%c2_1841 = arith.constant 2 : index
%c3_1842 = arith.constant 3 : index
%c3_1843 = arith.constant 3 : index
%c3_1844 = arith.constant 3 : index
%1891 = tensor.empty() : tensor<128x128x3x3xi8>
%1892 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66 : tensor<128x128x3x3xf32>) outs(%1891 : tensor<128x128x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1889
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1887
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x128x3x3xi8>
%cast_1845 = tensor.cast %1892 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%cast_1846 = tensor.cast %cast_1845 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%1893 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1894 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1895 = torch.aten.item %1893 : !torch.vtensor<[],f32> -> !torch.float
%1896 = torch_c.to_f64 %1895
%1897 = torch.aten.item %1894 : !torch.vtensor<[],si8> -> !torch.int
%1898 = torch_c.to_i64 %1897
%cast_1847 = tensor.cast %cast_1846 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%c1_1848 = arith.constant 1 : index
%c0_1849 = arith.constant 0 : index
%c128_1850 = arith.constant 128 : index
%c1_1851 = arith.constant 1 : index
%c128_1852 = arith.constant 128 : index
%c2_1853 = arith.constant 2 : index
%c3_1854 = arith.constant 3 : index
%c3_1855 = arith.constant 3 : index
%c3_1856 = arith.constant 3 : index
%1899 = tensor.empty() : tensor<128x128x3x3xf32>
%1900 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1847 : tensor<128x128x3x3xi8>) outs(%1899 : tensor<128x128x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1897
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1895
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x128x3x3xf32>
%cast_1857 = tensor.cast %1900 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
%1901 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1902 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1858 = torch.constant.int 12
%1903 = torch.aten.item %1901 : !torch.vtensor<[],f32> -> !torch.float
%1904 = torch_c.to_f64 %1903
%1905 = torch.aten.item %1902 : !torch.vtensor<[],si8> -> !torch.int
%1906 = torch_c.to_i64 %1905
%c1_1859 = arith.constant 1 : index
%c0_1860 = arith.constant 0 : index
%c128_1861 = arith.constant 128 : index
%1907 = tensor.empty() : tensor<128xi8>
%1908 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%68 : tensor<128xf32>) outs(%1907 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1905
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1903
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_1862 = tensor.cast %1908 : tensor<128xi8> to tensor<128xi8>
%cast_1863 = tensor.cast %cast_1862 : tensor<128xi8> to tensor<128xi8>
%1909 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1910 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1911 = torch.aten.item %1909 : !torch.vtensor<[],f32> -> !torch.float
%1912 = torch_c.to_f64 %1911
%1913 = torch.aten.item %1910 : !torch.vtensor<[],si8> -> !torch.int
%1914 = torch_c.to_i64 %1913
%cast_1864 = tensor.cast %cast_1863 : tensor<128xi8> to tensor<128xi8>
%c1_1865 = arith.constant 1 : index
%c0_1866 = arith.constant 0 : index
%c128_1867 = arith.constant 128 : index
%1915 = tensor.empty() : tensor<128xf32>
%1916 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1864 : tensor<128xi8>) outs(%1915 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1913
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1911
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_1868 = tensor.cast %1916 : tensor<128xf32> to tensor<128xf32>
%int1_1869 = torch.constant.int 1
%int1_1870 = torch.constant.int 1
%int1_1871 = torch.constant.int 1
%int1_1872 = torch.constant.int 1
%int1_1873 = torch.constant.int 1
%int1_1874 = torch.constant.int 1
%int0_1875 = torch.constant.int 0
%1917 = torch.prim.ListConstruct %int1_1869, %int1_1870 : (!torch.int, !torch.int) -> !torch.list<int>
%1918 = torch.prim.ListConstruct %int1_1871, %int1_1872 : (!torch.int, !torch.int) -> !torch.list<int>
%1919 = torch.prim.ListConstruct %int1_1873, %int1_1874 : (!torch.int, !torch.int) -> !torch.list<int>
%1920 = torch.prim.ListConstruct %int0_1875, %int0_1875 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1876 = torch.constant.bool false
%int1_1877 = torch.constant.int 1
%1921 = torch_c.to_i64 %int1_1877
%1922 = torch_c.to_i64 %int1_1869
%1923 = torch_c.to_i64 %int1_1870
%1924 = torch_c.to_i64 %int0_1875
%1925 = torch_c.to_i64 %int0_1875
%c0_1878 = arith.constant 0 : index
%c1_1879 = arith.constant 1 : index
%c1_1880 = arith.constant 1 : index
%c128_1881 = arith.constant 128 : index
%c2_1882 = arith.constant 2 : index
%c28_1883 = arith.constant 28 : index
%c3_1884 = arith.constant 3 : index
%c28_1885 = arith.constant 28 : index
%c0_1886 = arith.constant 0 : index
%c128_1887 = arith.constant 128 : index
%c1_1888 = arith.constant 1 : index
%c128_1889 = arith.constant 128 : index
%c2_1890 = arith.constant 2 : index
%c3_1891 = arith.constant 3 : index
%c3_1892 = arith.constant 3 : index
%c3_1893 = arith.constant 3 : index
%1926 = arith.index_cast %1921 : i64 to index
%c0_1894 = arith.constant 0 : index
%1927 = arith.remsi %c128_1881, %1926 : index
%1928 = arith.cmpi eq, %c0_1894, %1927 : index
cf.assert %1928, "invalid: groups must divide input channel size evenly."
%c0_1895 = arith.constant 0 : index
%1929 = arith.remsi %c128_1887, %1926 : index
%1930 = arith.cmpi eq, %c0_1895, %1929 : index
cf.assert %1930, "invalid: groups must divide weight batch size evenly."
%c1_i64_1896 = arith.constant 1 : i64
%c1_i64_1897 = arith.constant 1 : i64
%c1_i64_1898 = arith.constant 1 : i64
%c1_i64_1899 = arith.constant 1 : i64
%cst_1900 = arith.constant 0.000000e+00 : f32
%c0_1901 = arith.constant 0 : index
%c1_1902 = arith.constant 1 : index
%c1_1903 = arith.constant 1 : index
%c128_1904 = arith.constant 128 : index
%c2_1905 = arith.constant 2 : index
%c28_1906 = arith.constant 28 : index
%c3_1907 = arith.constant 3 : index
%c28_1908 = arith.constant 28 : index
%c0_i64_1909 = arith.constant 0 : i64
%1931 = arith.index_cast %c0_i64_1909 : i64 to index
%1932 = arith.index_cast %c0_i64_1909 : i64 to index
%1933 = arith.index_cast %1922 : i64 to index
%1934 = arith.index_cast %1923 : i64 to index
%padded_1910 = tensor.pad %cast_1834 low[%1931, %1932, %1933, %1934] high[%1931, %1932, %1933, %1934] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1900 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%1935 = arith.index_cast %c3_1891 : index to i64
%c1_i64_1911 = arith.constant 1 : i64
%c2_i64_1912 = arith.constant 2 : i64
%1936 = arith.muli %1922, %c2_i64_1912 : i64
%1937 = arith.index_cast %c28_1883 : index to i64
%1938 = arith.addi %1937, %1936 : i64
%1939 = arith.subi %1935, %c1_i64_1911 : i64
%1940 = arith.muli %c1_i64_1896, %1939 : i64
%1941 = arith.subi %1938, %1940 : i64
%1942 = arith.subi %1941, %c1_i64_1911 : i64
%1943 = arith.floordivsi %1942, %c1_i64_1898 : i64
%1944 = arith.addi %1943, %c1_i64_1911 : i64
%1945 = arith.index_cast %1944 : i64 to index
%1946 = arith.index_cast %c3_1893 : index to i64
%c1_i64_1913 = arith.constant 1 : i64
%c2_i64_1914 = arith.constant 2 : i64
%1947 = arith.muli %1923, %c2_i64_1914 : i64
%1948 = arith.index_cast %c28_1885 : index to i64
%1949 = arith.addi %1948, %1947 : i64
%1950 = arith.subi %1946, %c1_i64_1913 : i64
%1951 = arith.muli %c1_i64_1897, %1950 : i64
%1952 = arith.subi %1949, %1951 : i64
%1953 = arith.subi %1952, %c1_i64_1913 : i64
%1954 = arith.floordivsi %1953, %c1_i64_1899 : i64
%1955 = arith.addi %1954, %c1_i64_1913 : i64
%1956 = arith.index_cast %1955 : i64 to index
%1957 = tensor.empty(%1945, %1956) : tensor<1x128x?x?xf32>
%1958 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1868 : tensor<128xf32>) outs(%1957 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%1959 = arith.floordivsi %c128_1881, %1926 : index
%1960 = arith.floordivsi %c128_1887, %1926 : index
%c0_1915 = arith.constant 0 : index
%c1_1916 = arith.constant 1 : index
%1961 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1910, %cast_1857 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%1958 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_1917 = tensor.cast %1961 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_1918 = arith.constant 1 : index
%c1_1919 = arith.constant 1 : index
%c128_1920 = arith.constant 128 : index
%c2_1921 = arith.constant 2 : index
%c28_1922 = arith.constant 28 : index
%c3_1923 = arith.constant 3 : index
%c28_1924 = arith.constant 28 : index
%1962 = tensor.empty() : tensor<1x128x28x28xf32>
%1963 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1917 : tensor<1x128x28x28xf32>) outs(%1962 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_1925 = tensor.cast %1963 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1964 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1965 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1926 = torch.constant.int 12
%1966 = torch.aten.item %1964 : !torch.vtensor<[],f32> -> !torch.float
%1967 = torch_c.to_f64 %1966
%1968 = torch.aten.item %1965 : !torch.vtensor<[],si8> -> !torch.int
%1969 = torch_c.to_i64 %1968
%c1_1927 = arith.constant 1 : index
%c1_1928 = arith.constant 1 : index
%c128_1929 = arith.constant 128 : index
%c2_1930 = arith.constant 2 : index
%c28_1931 = arith.constant 28 : index
%c3_1932 = arith.constant 3 : index
%c28_1933 = arith.constant 28 : index
%1970 = tensor.empty() : tensor<1x128x28x28xi8>
%1971 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1925 : tensor<1x128x28x28xf32>) outs(%1970 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1968
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1966
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_1934 = tensor.cast %1971 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_1935 = tensor.cast %cast_1934 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%1972 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%1973 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1974 = torch.aten.item %1972 : !torch.vtensor<[],f32> -> !torch.float
%1975 = torch_c.to_f64 %1974
%1976 = torch.aten.item %1973 : !torch.vtensor<[],si8> -> !torch.int
%1977 = torch_c.to_i64 %1976
%cast_1936 = tensor.cast %cast_1935 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_1937 = arith.constant 1 : index
%c1_1938 = arith.constant 1 : index
%c128_1939 = arith.constant 128 : index
%c2_1940 = arith.constant 2 : index
%c28_1941 = arith.constant 28 : index
%c3_1942 = arith.constant 3 : index
%c28_1943 = arith.constant 28 : index
%1978 = tensor.empty() : tensor<1x128x28x28xf32>
%1979 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1936 : tensor<1x128x28x28xi8>) outs(%1978 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1976
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1974
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_1944 = tensor.cast %1979 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%1980 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1981 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1945 = torch.constant.int 12
%1982 = torch.aten.item %1980 : !torch.vtensor<[],f32> -> !torch.float
%1983 = torch_c.to_f64 %1982
%1984 = torch.aten.item %1981 : !torch.vtensor<[],si8> -> !torch.int
%1985 = torch_c.to_i64 %1984
%c1_1946 = arith.constant 1 : index
%c0_1947 = arith.constant 0 : index
%c512_1948 = arith.constant 512 : index
%c1_1949 = arith.constant 1 : index
%c128_1950 = arith.constant 128 : index
%1986 = tensor.empty() : tensor<512x128x1x1xi8>
%1987 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%70 : tensor<512x128x1x1xf32>) outs(%1986 : tensor<512x128x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %1984
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1982
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x128x1x1xi8>
%cast_1951 = tensor.cast %1987 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%cast_1952 = tensor.cast %cast_1951 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%1988 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1989 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%1990 = torch.aten.item %1988 : !torch.vtensor<[],f32> -> !torch.float
%1991 = torch_c.to_f64 %1990
%1992 = torch.aten.item %1989 : !torch.vtensor<[],si8> -> !torch.int
%1993 = torch_c.to_i64 %1992
%cast_1953 = tensor.cast %cast_1952 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%c1_1954 = arith.constant 1 : index
%c0_1955 = arith.constant 0 : index
%c512_1956 = arith.constant 512 : index
%c1_1957 = arith.constant 1 : index
%c128_1958 = arith.constant 128 : index
%1994 = tensor.empty() : tensor<512x128x1x1xf32>
%1995 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1953 : tensor<512x128x1x1xi8>) outs(%1994 : tensor<512x128x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %1992
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %1990
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x128x1x1xf32>
%cast_1959 = tensor.cast %1995 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
%1996 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%1997 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_1960 = torch.constant.int 12
%1998 = torch.aten.item %1996 : !torch.vtensor<[],f32> -> !torch.float
%1999 = torch_c.to_f64 %1998
%2000 = torch.aten.item %1997 : !torch.vtensor<[],si8> -> !torch.int
%2001 = torch_c.to_i64 %2000
%c1_1961 = arith.constant 1 : index
%c0_1962 = arith.constant 0 : index
%c512_1963 = arith.constant 512 : index
%2002 = tensor.empty() : tensor<512xi8>
%2003 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%72 : tensor<512xf32>) outs(%2002 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2000
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %1998
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_1964 = tensor.cast %2003 : tensor<512xi8> to tensor<512xi8>
%cast_1965 = tensor.cast %cast_1964 : tensor<512xi8> to tensor<512xi8>
%2004 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2005 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2006 = torch.aten.item %2004 : !torch.vtensor<[],f32> -> !torch.float
%2007 = torch_c.to_f64 %2006
%2008 = torch.aten.item %2005 : !torch.vtensor<[],si8> -> !torch.int
%2009 = torch_c.to_i64 %2008
%cast_1966 = tensor.cast %cast_1965 : tensor<512xi8> to tensor<512xi8>
%c1_1967 = arith.constant 1 : index
%c0_1968 = arith.constant 0 : index
%c512_1969 = arith.constant 512 : index
%2010 = tensor.empty() : tensor<512xf32>
%2011 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1966 : tensor<512xi8>) outs(%2010 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2008
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2006
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_1970 = tensor.cast %2011 : tensor<512xf32> to tensor<512xf32>
%int0_1971 = torch.constant.int 0
%int0_1972 = torch.constant.int 0
%int1_1973 = torch.constant.int 1
%int1_1974 = torch.constant.int 1
%int1_1975 = torch.constant.int 1
%int1_1976 = torch.constant.int 1
%int0_1977 = torch.constant.int 0
%2012 = torch.prim.ListConstruct %int0_1971, %int0_1972 : (!torch.int, !torch.int) -> !torch.list<int>
%2013 = torch.prim.ListConstruct %int1_1973, %int1_1974 : (!torch.int, !torch.int) -> !torch.list<int>
%2014 = torch.prim.ListConstruct %int1_1975, %int1_1976 : (!torch.int, !torch.int) -> !torch.list<int>
%2015 = torch.prim.ListConstruct %int0_1977, %int0_1977 : (!torch.int, !torch.int) -> !torch.list<int>
%false_1978 = torch.constant.bool false
%int1_1979 = torch.constant.int 1
%2016 = torch_c.to_i64 %int1_1979
%2017 = torch_c.to_i64 %int0_1971
%2018 = torch_c.to_i64 %int0_1972
%2019 = torch_c.to_i64 %int0_1977
%2020 = torch_c.to_i64 %int0_1977
%c0_1980 = arith.constant 0 : index
%c1_1981 = arith.constant 1 : index
%c1_1982 = arith.constant 1 : index
%c128_1983 = arith.constant 128 : index
%c2_1984 = arith.constant 2 : index
%c28_1985 = arith.constant 28 : index
%c3_1986 = arith.constant 3 : index
%c28_1987 = arith.constant 28 : index
%c0_1988 = arith.constant 0 : index
%c512_1989 = arith.constant 512 : index
%c1_1990 = arith.constant 1 : index
%c128_1991 = arith.constant 128 : index
%c2_1992 = arith.constant 2 : index
%c1_1993 = arith.constant 1 : index
%c3_1994 = arith.constant 3 : index
%c1_1995 = arith.constant 1 : index
%2021 = arith.index_cast %2016 : i64 to index
%c0_1996 = arith.constant 0 : index
%2022 = arith.remsi %c128_1983, %2021 : index
%2023 = arith.cmpi eq, %c0_1996, %2022 : index
cf.assert %2023, "invalid: groups must divide input channel size evenly."
%c0_1997 = arith.constant 0 : index
%2024 = arith.remsi %c512_1989, %2021 : index
%2025 = arith.cmpi eq, %c0_1997, %2024 : index
cf.assert %2025, "invalid: groups must divide weight batch size evenly."
%c1_i64_1998 = arith.constant 1 : i64
%c1_i64_1999 = arith.constant 1 : i64
%c1_i64_2000 = arith.constant 1 : i64
%c1_i64_2001 = arith.constant 1 : i64
%cst_2002 = arith.constant 0.000000e+00 : f32
%c0_2003 = arith.constant 0 : index
%c1_2004 = arith.constant 1 : index
%c1_2005 = arith.constant 1 : index
%c128_2006 = arith.constant 128 : index
%c2_2007 = arith.constant 2 : index
%c28_2008 = arith.constant 28 : index
%c3_2009 = arith.constant 3 : index
%c28_2010 = arith.constant 28 : index
%c0_i64_2011 = arith.constant 0 : i64
%2026 = arith.index_cast %c0_i64_2011 : i64 to index
%2027 = arith.index_cast %c0_i64_2011 : i64 to index
%2028 = arith.index_cast %2017 : i64 to index
%2029 = arith.index_cast %2018 : i64 to index
%padded_2012 = tensor.pad %cast_1944 low[%2026, %2027, %2028, %2029] high[%2026, %2027, %2028, %2029] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2002 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%2030 = arith.index_cast %c1_1993 : index to i64
%c1_i64_2013 = arith.constant 1 : i64
%c2_i64_2014 = arith.constant 2 : i64
%2031 = arith.muli %2017, %c2_i64_2014 : i64
%2032 = arith.index_cast %c28_1985 : index to i64
%2033 = arith.addi %2032, %2031 : i64
%2034 = arith.subi %2030, %c1_i64_2013 : i64
%2035 = arith.muli %c1_i64_1998, %2034 : i64
%2036 = arith.subi %2033, %2035 : i64
%2037 = arith.subi %2036, %c1_i64_2013 : i64
%2038 = arith.floordivsi %2037, %c1_i64_2000 : i64
%2039 = arith.addi %2038, %c1_i64_2013 : i64
%2040 = arith.index_cast %2039 : i64 to index
%2041 = arith.index_cast %c1_1995 : index to i64
%c1_i64_2015 = arith.constant 1 : i64
%c2_i64_2016 = arith.constant 2 : i64
%2042 = arith.muli %2018, %c2_i64_2016 : i64
%2043 = arith.index_cast %c28_1987 : index to i64
%2044 = arith.addi %2043, %2042 : i64
%2045 = arith.subi %2041, %c1_i64_2015 : i64
%2046 = arith.muli %c1_i64_1999, %2045 : i64
%2047 = arith.subi %2044, %2046 : i64
%2048 = arith.subi %2047, %c1_i64_2015 : i64
%2049 = arith.floordivsi %2048, %c1_i64_2001 : i64
%2050 = arith.addi %2049, %c1_i64_2015 : i64
%2051 = arith.index_cast %2050 : i64 to index
%2052 = tensor.empty(%2040, %2051) : tensor<1x512x?x?xf32>
%2053 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1970 : tensor<512xf32>) outs(%2052 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%2054 = arith.floordivsi %c128_1983, %2021 : index
%2055 = arith.floordivsi %c512_1989, %2021 : index
%c0_2017 = arith.constant 0 : index
%c1_2018 = arith.constant 1 : index
%2056 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2012, %cast_1959 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2053 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_2019 = tensor.cast %2056 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
%2057 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2058 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2020 = torch.constant.int 12
%2059 = torch.aten.item %2057 : !torch.vtensor<[],f32> -> !torch.float
%2060 = torch_c.to_f64 %2059
%2061 = torch.aten.item %2058 : !torch.vtensor<[],si8> -> !torch.int
%2062 = torch_c.to_i64 %2061
%c1_2021 = arith.constant 1 : index
%c1_2022 = arith.constant 1 : index
%c512_2023 = arith.constant 512 : index
%c2_2024 = arith.constant 2 : index
%c28_2025 = arith.constant 28 : index
%c3_2026 = arith.constant 3 : index
%c28_2027 = arith.constant 28 : index
%2063 = tensor.empty() : tensor<1x512x28x28xi8>
%2064 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2019 : tensor<1x512x28x28xf32>) outs(%2063 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2061
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2059
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2028 = tensor.cast %2064 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2029 = tensor.cast %cast_2028 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2065 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2066 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2067 = torch.aten.item %2065 : !torch.vtensor<[],f32> -> !torch.float
%2068 = torch_c.to_f64 %2067
%2069 = torch.aten.item %2066 : !torch.vtensor<[],si8> -> !torch.int
%2070 = torch_c.to_i64 %2069
%cast_2030 = tensor.cast %cast_2029 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2031 = arith.constant 1 : index
%c1_2032 = arith.constant 1 : index
%c512_2033 = arith.constant 512 : index
%c2_2034 = arith.constant 2 : index
%c28_2035 = arith.constant 28 : index
%c3_2036 = arith.constant 3 : index
%c28_2037 = arith.constant 28 : index
%2071 = tensor.empty() : tensor<1x512x28x28xf32>
%2072 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2030 : tensor<1x512x28x28xi8>) outs(%2071 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2069
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2067
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2038 = tensor.cast %2072 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%int1_2039 = torch.constant.int 1
%2073 = torch_c.to_i64 %int1_2039
%c1_2040 = arith.constant 1 : index
%c1_2041 = arith.constant 1 : index
%c512_2042 = arith.constant 512 : index
%c2_2043 = arith.constant 2 : index
%c28_2044 = arith.constant 28 : index
%c3_2045 = arith.constant 3 : index
%c28_2046 = arith.constant 28 : index
%c1_2047 = arith.constant 1 : index
%c512_2048 = arith.constant 512 : index
%2074 = arith.cmpi eq, %c512_2042, %c512_2048 : index
cf.assert %2074, "mismatched size for broadcast"
%c2_2049 = arith.constant 2 : index
%c28_2050 = arith.constant 28 : index
%2075 = arith.cmpi eq, %c28_2044, %c28_2050 : index
cf.assert %2075, "mismatched size for broadcast"
%c3_2051 = arith.constant 3 : index
%c28_2052 = arith.constant 28 : index
%2076 = arith.cmpi eq, %c28_2046, %c28_2052 : index
cf.assert %2076, "mismatched size for broadcast"
%2077 = tensor.empty() : tensor<1x512x28x28xf32>
%2078 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2038, %cast_1732 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2077 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %2073 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x512x28x28xf32>
%cast_2053 = tensor.cast %2078 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%c1_2054 = arith.constant 1 : index
%c1_2055 = arith.constant 1 : index
%c512_2056 = arith.constant 512 : index
%c2_2057 = arith.constant 2 : index
%c28_2058 = arith.constant 28 : index
%c3_2059 = arith.constant 3 : index
%c28_2060 = arith.constant 28 : index
%2079 = tensor.empty() : tensor<1x512x28x28xf32>
%2080 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2053 : tensor<1x512x28x28xf32>) outs(%2079 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x28x28xf32>
%cast_2061 = tensor.cast %2080 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2081 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2082 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2062 = torch.constant.int 12
%2083 = torch.aten.item %2081 : !torch.vtensor<[],f32> -> !torch.float
%2084 = torch_c.to_f64 %2083
%2085 = torch.aten.item %2082 : !torch.vtensor<[],si8> -> !torch.int
%2086 = torch_c.to_i64 %2085
%c1_2063 = arith.constant 1 : index
%c1_2064 = arith.constant 1 : index
%c512_2065 = arith.constant 512 : index
%c2_2066 = arith.constant 2 : index
%c28_2067 = arith.constant 28 : index
%c3_2068 = arith.constant 3 : index
%c28_2069 = arith.constant 28 : index
%2087 = tensor.empty() : tensor<1x512x28x28xi8>
%2088 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2061 : tensor<1x512x28x28xf32>) outs(%2087 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2085
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2083
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2070 = tensor.cast %2088 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2071 = tensor.cast %cast_2070 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2089 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2090 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2091 = torch.aten.item %2089 : !torch.vtensor<[],f32> -> !torch.float
%2092 = torch_c.to_f64 %2091
%2093 = torch.aten.item %2090 : !torch.vtensor<[],si8> -> !torch.int
%2094 = torch_c.to_i64 %2093
%cast_2072 = tensor.cast %cast_2071 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2073 = arith.constant 1 : index
%c1_2074 = arith.constant 1 : index
%c512_2075 = arith.constant 512 : index
%c2_2076 = arith.constant 2 : index
%c28_2077 = arith.constant 28 : index
%c3_2078 = arith.constant 3 : index
%c28_2079 = arith.constant 28 : index
%2095 = tensor.empty() : tensor<1x512x28x28xf32>
%2096 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2072 : tensor<1x512x28x28xi8>) outs(%2095 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2093
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2091
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2080 = tensor.cast %2096 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2097 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%2098 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2081 = torch.constant.int 12
%2099 = torch.aten.item %2097 : !torch.vtensor<[],f32> -> !torch.float
%2100 = torch_c.to_f64 %2099
%2101 = torch.aten.item %2098 : !torch.vtensor<[],si8> -> !torch.int
%2102 = torch_c.to_i64 %2101
%c1_2082 = arith.constant 1 : index
%c0_2083 = arith.constant 0 : index
%c128_2084 = arith.constant 128 : index
%c1_2085 = arith.constant 1 : index
%c512_2086 = arith.constant 512 : index
%2103 = tensor.empty() : tensor<128x512x1x1xi8>
%2104 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74 : tensor<128x512x1x1xf32>) outs(%2103 : tensor<128x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2101
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2099
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x512x1x1xi8>
%cast_2087 = tensor.cast %2104 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%cast_2088 = tensor.cast %cast_2087 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%2105 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%2106 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2107 = torch.aten.item %2105 : !torch.vtensor<[],f32> -> !torch.float
%2108 = torch_c.to_f64 %2107
%2109 = torch.aten.item %2106 : !torch.vtensor<[],si8> -> !torch.int
%2110 = torch_c.to_i64 %2109
%cast_2089 = tensor.cast %cast_2088 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%c1_2090 = arith.constant 1 : index
%c0_2091 = arith.constant 0 : index
%c128_2092 = arith.constant 128 : index
%c1_2093 = arith.constant 1 : index
%c512_2094 = arith.constant 512 : index
%2111 = tensor.empty() : tensor<128x512x1x1xf32>
%2112 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2089 : tensor<128x512x1x1xi8>) outs(%2111 : tensor<128x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2109
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2107
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x512x1x1xf32>
%cast_2095 = tensor.cast %2112 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
%2113 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2114 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2096 = torch.constant.int 12
%2115 = torch.aten.item %2113 : !torch.vtensor<[],f32> -> !torch.float
%2116 = torch_c.to_f64 %2115
%2117 = torch.aten.item %2114 : !torch.vtensor<[],si8> -> !torch.int
%2118 = torch_c.to_i64 %2117
%c1_2097 = arith.constant 1 : index
%c0_2098 = arith.constant 0 : index
%c128_2099 = arith.constant 128 : index
%2119 = tensor.empty() : tensor<128xi8>
%2120 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%76 : tensor<128xf32>) outs(%2119 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2117
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2115
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_2100 = tensor.cast %2120 : tensor<128xi8> to tensor<128xi8>
%cast_2101 = tensor.cast %cast_2100 : tensor<128xi8> to tensor<128xi8>
%2121 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2122 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2123 = torch.aten.item %2121 : !torch.vtensor<[],f32> -> !torch.float
%2124 = torch_c.to_f64 %2123
%2125 = torch.aten.item %2122 : !torch.vtensor<[],si8> -> !torch.int
%2126 = torch_c.to_i64 %2125
%cast_2102 = tensor.cast %cast_2101 : tensor<128xi8> to tensor<128xi8>
%c1_2103 = arith.constant 1 : index
%c0_2104 = arith.constant 0 : index
%c128_2105 = arith.constant 128 : index
%2127 = tensor.empty() : tensor<128xf32>
%2128 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2102 : tensor<128xi8>) outs(%2127 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2125
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2123
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_2106 = tensor.cast %2128 : tensor<128xf32> to tensor<128xf32>
%int0_2107 = torch.constant.int 0
%int0_2108 = torch.constant.int 0
%int1_2109 = torch.constant.int 1
%int1_2110 = torch.constant.int 1
%int1_2111 = torch.constant.int 1
%int1_2112 = torch.constant.int 1
%int0_2113 = torch.constant.int 0
%2129 = torch.prim.ListConstruct %int0_2107, %int0_2108 : (!torch.int, !torch.int) -> !torch.list<int>
%2130 = torch.prim.ListConstruct %int1_2109, %int1_2110 : (!torch.int, !torch.int) -> !torch.list<int>
%2131 = torch.prim.ListConstruct %int1_2111, %int1_2112 : (!torch.int, !torch.int) -> !torch.list<int>
%2132 = torch.prim.ListConstruct %int0_2113, %int0_2113 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2114 = torch.constant.bool false
%int1_2115 = torch.constant.int 1
%2133 = torch_c.to_i64 %int1_2115
%2134 = torch_c.to_i64 %int0_2107
%2135 = torch_c.to_i64 %int0_2108
%2136 = torch_c.to_i64 %int0_2113
%2137 = torch_c.to_i64 %int0_2113
%c0_2116 = arith.constant 0 : index
%c1_2117 = arith.constant 1 : index
%c1_2118 = arith.constant 1 : index
%c512_2119 = arith.constant 512 : index
%c2_2120 = arith.constant 2 : index
%c28_2121 = arith.constant 28 : index
%c3_2122 = arith.constant 3 : index
%c28_2123 = arith.constant 28 : index
%c0_2124 = arith.constant 0 : index
%c128_2125 = arith.constant 128 : index
%c1_2126 = arith.constant 1 : index
%c512_2127 = arith.constant 512 : index
%c2_2128 = arith.constant 2 : index
%c1_2129 = arith.constant 1 : index
%c3_2130 = arith.constant 3 : index
%c1_2131 = arith.constant 1 : index
%2138 = arith.index_cast %2133 : i64 to index
%c0_2132 = arith.constant 0 : index
%2139 = arith.remsi %c512_2119, %2138 : index
%2140 = arith.cmpi eq, %c0_2132, %2139 : index
cf.assert %2140, "invalid: groups must divide input channel size evenly."
%c0_2133 = arith.constant 0 : index
%2141 = arith.remsi %c128_2125, %2138 : index
%2142 = arith.cmpi eq, %c0_2133, %2141 : index
cf.assert %2142, "invalid: groups must divide weight batch size evenly."
%c1_i64_2134 = arith.constant 1 : i64
%c1_i64_2135 = arith.constant 1 : i64
%c1_i64_2136 = arith.constant 1 : i64
%c1_i64_2137 = arith.constant 1 : i64
%cst_2138 = arith.constant 0.000000e+00 : f32
%c0_2139 = arith.constant 0 : index
%c1_2140 = arith.constant 1 : index
%c1_2141 = arith.constant 1 : index
%c512_2142 = arith.constant 512 : index
%c2_2143 = arith.constant 2 : index
%c28_2144 = arith.constant 28 : index
%c3_2145 = arith.constant 3 : index
%c28_2146 = arith.constant 28 : index
%c0_i64_2147 = arith.constant 0 : i64
%2143 = arith.index_cast %c0_i64_2147 : i64 to index
%2144 = arith.index_cast %c0_i64_2147 : i64 to index
%2145 = arith.index_cast %2134 : i64 to index
%2146 = arith.index_cast %2135 : i64 to index
%padded_2148 = tensor.pad %cast_2080 low[%2143, %2144, %2145, %2146] high[%2143, %2144, %2145, %2146] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2138 : f32
} : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
%2147 = arith.index_cast %c1_2129 : index to i64
%c1_i64_2149 = arith.constant 1 : i64
%c2_i64_2150 = arith.constant 2 : i64
%2148 = arith.muli %2134, %c2_i64_2150 : i64
%2149 = arith.index_cast %c28_2121 : index to i64
%2150 = arith.addi %2149, %2148 : i64
%2151 = arith.subi %2147, %c1_i64_2149 : i64
%2152 = arith.muli %c1_i64_2134, %2151 : i64
%2153 = arith.subi %2150, %2152 : i64
%2154 = arith.subi %2153, %c1_i64_2149 : i64
%2155 = arith.floordivsi %2154, %c1_i64_2136 : i64
%2156 = arith.addi %2155, %c1_i64_2149 : i64
%2157 = arith.index_cast %2156 : i64 to index
%2158 = arith.index_cast %c1_2131 : index to i64
%c1_i64_2151 = arith.constant 1 : i64
%c2_i64_2152 = arith.constant 2 : i64
%2159 = arith.muli %2135, %c2_i64_2152 : i64
%2160 = arith.index_cast %c28_2123 : index to i64
%2161 = arith.addi %2160, %2159 : i64
%2162 = arith.subi %2158, %c1_i64_2151 : i64
%2163 = arith.muli %c1_i64_2135, %2162 : i64
%2164 = arith.subi %2161, %2163 : i64
%2165 = arith.subi %2164, %c1_i64_2151 : i64
%2166 = arith.floordivsi %2165, %c1_i64_2137 : i64
%2167 = arith.addi %2166, %c1_i64_2151 : i64
%2168 = arith.index_cast %2167 : i64 to index
%2169 = tensor.empty(%2157, %2168) : tensor<1x128x?x?xf32>
%2170 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2106 : tensor<128xf32>) outs(%2169 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%2171 = arith.floordivsi %c512_2119, %2138 : index
%2172 = arith.floordivsi %c128_2125, %2138 : index
%c0_2153 = arith.constant 0 : index
%c1_2154 = arith.constant 1 : index
%2173 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2148, %cast_2095 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%2170 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_2155 = tensor.cast %2173 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_2156 = arith.constant 1 : index
%c1_2157 = arith.constant 1 : index
%c128_2158 = arith.constant 128 : index
%c2_2159 = arith.constant 2 : index
%c28_2160 = arith.constant 28 : index
%c3_2161 = arith.constant 3 : index
%c28_2162 = arith.constant 28 : index
%2174 = tensor.empty() : tensor<1x128x28x28xf32>
%2175 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2155 : tensor<1x128x28x28xf32>) outs(%2174 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_2163 = tensor.cast %2175 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2176 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2177 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2164 = torch.constant.int 12
%2178 = torch.aten.item %2176 : !torch.vtensor<[],f32> -> !torch.float
%2179 = torch_c.to_f64 %2178
%2180 = torch.aten.item %2177 : !torch.vtensor<[],si8> -> !torch.int
%2181 = torch_c.to_i64 %2180
%c1_2165 = arith.constant 1 : index
%c1_2166 = arith.constant 1 : index
%c128_2167 = arith.constant 128 : index
%c2_2168 = arith.constant 2 : index
%c28_2169 = arith.constant 28 : index
%c3_2170 = arith.constant 3 : index
%c28_2171 = arith.constant 28 : index
%2182 = tensor.empty() : tensor<1x128x28x28xi8>
%2183 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2163 : tensor<1x128x28x28xf32>) outs(%2182 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2180
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2178
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_2172 = tensor.cast %2183 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_2173 = tensor.cast %cast_2172 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%2184 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2185 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2186 = torch.aten.item %2184 : !torch.vtensor<[],f32> -> !torch.float
%2187 = torch_c.to_f64 %2186
%2188 = torch.aten.item %2185 : !torch.vtensor<[],si8> -> !torch.int
%2189 = torch_c.to_i64 %2188
%cast_2174 = tensor.cast %cast_2173 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_2175 = arith.constant 1 : index
%c1_2176 = arith.constant 1 : index
%c128_2177 = arith.constant 128 : index
%c2_2178 = arith.constant 2 : index
%c28_2179 = arith.constant 28 : index
%c3_2180 = arith.constant 3 : index
%c28_2181 = arith.constant 28 : index
%2190 = tensor.empty() : tensor<1x128x28x28xf32>
%2191 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2174 : tensor<1x128x28x28xi8>) outs(%2190 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2188
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2186
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_2182 = tensor.cast %2191 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2192 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2193 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2183 = torch.constant.int 12
%2194 = torch.aten.item %2192 : !torch.vtensor<[],f32> -> !torch.float
%2195 = torch_c.to_f64 %2194
%2196 = torch.aten.item %2193 : !torch.vtensor<[],si8> -> !torch.int
%2197 = torch_c.to_i64 %2196
%c1_2184 = arith.constant 1 : index
%c0_2185 = arith.constant 0 : index
%c128_2186 = arith.constant 128 : index
%c1_2187 = arith.constant 1 : index
%c128_2188 = arith.constant 128 : index
%c2_2189 = arith.constant 2 : index
%c3_2190 = arith.constant 3 : index
%c3_2191 = arith.constant 3 : index
%c3_2192 = arith.constant 3 : index
%2198 = tensor.empty() : tensor<128x128x3x3xi8>
%2199 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%78 : tensor<128x128x3x3xf32>) outs(%2198 : tensor<128x128x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2196
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2194
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x128x3x3xi8>
%cast_2193 = tensor.cast %2199 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%cast_2194 = tensor.cast %cast_2193 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%2200 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2201 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2202 = torch.aten.item %2200 : !torch.vtensor<[],f32> -> !torch.float
%2203 = torch_c.to_f64 %2202
%2204 = torch.aten.item %2201 : !torch.vtensor<[],si8> -> !torch.int
%2205 = torch_c.to_i64 %2204
%cast_2195 = tensor.cast %cast_2194 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%c1_2196 = arith.constant 1 : index
%c0_2197 = arith.constant 0 : index
%c128_2198 = arith.constant 128 : index
%c1_2199 = arith.constant 1 : index
%c128_2200 = arith.constant 128 : index
%c2_2201 = arith.constant 2 : index
%c3_2202 = arith.constant 3 : index
%c3_2203 = arith.constant 3 : index
%c3_2204 = arith.constant 3 : index
%2206 = tensor.empty() : tensor<128x128x3x3xf32>
%2207 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2195 : tensor<128x128x3x3xi8>) outs(%2206 : tensor<128x128x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2204
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2202
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x128x3x3xf32>
%cast_2205 = tensor.cast %2207 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
%2208 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2209 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2206 = torch.constant.int 12
%2210 = torch.aten.item %2208 : !torch.vtensor<[],f32> -> !torch.float
%2211 = torch_c.to_f64 %2210
%2212 = torch.aten.item %2209 : !torch.vtensor<[],si8> -> !torch.int
%2213 = torch_c.to_i64 %2212
%c1_2207 = arith.constant 1 : index
%c0_2208 = arith.constant 0 : index
%c128_2209 = arith.constant 128 : index
%2214 = tensor.empty() : tensor<128xi8>
%2215 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%80 : tensor<128xf32>) outs(%2214 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2212
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2210
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_2210 = tensor.cast %2215 : tensor<128xi8> to tensor<128xi8>
%cast_2211 = tensor.cast %cast_2210 : tensor<128xi8> to tensor<128xi8>
%2216 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2217 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2218 = torch.aten.item %2216 : !torch.vtensor<[],f32> -> !torch.float
%2219 = torch_c.to_f64 %2218
%2220 = torch.aten.item %2217 : !torch.vtensor<[],si8> -> !torch.int
%2221 = torch_c.to_i64 %2220
%cast_2212 = tensor.cast %cast_2211 : tensor<128xi8> to tensor<128xi8>
%c1_2213 = arith.constant 1 : index
%c0_2214 = arith.constant 0 : index
%c128_2215 = arith.constant 128 : index
%2222 = tensor.empty() : tensor<128xf32>
%2223 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2212 : tensor<128xi8>) outs(%2222 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2220
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2218
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_2216 = tensor.cast %2223 : tensor<128xf32> to tensor<128xf32>
%int1_2217 = torch.constant.int 1
%int1_2218 = torch.constant.int 1
%int1_2219 = torch.constant.int 1
%int1_2220 = torch.constant.int 1
%int1_2221 = torch.constant.int 1
%int1_2222 = torch.constant.int 1
%int0_2223 = torch.constant.int 0
%2224 = torch.prim.ListConstruct %int1_2217, %int1_2218 : (!torch.int, !torch.int) -> !torch.list<int>
%2225 = torch.prim.ListConstruct %int1_2219, %int1_2220 : (!torch.int, !torch.int) -> !torch.list<int>
%2226 = torch.prim.ListConstruct %int1_2221, %int1_2222 : (!torch.int, !torch.int) -> !torch.list<int>
%2227 = torch.prim.ListConstruct %int0_2223, %int0_2223 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2224 = torch.constant.bool false
%int1_2225 = torch.constant.int 1
%2228 = torch_c.to_i64 %int1_2225
%2229 = torch_c.to_i64 %int1_2217
%2230 = torch_c.to_i64 %int1_2218
%2231 = torch_c.to_i64 %int0_2223
%2232 = torch_c.to_i64 %int0_2223
%c0_2226 = arith.constant 0 : index
%c1_2227 = arith.constant 1 : index
%c1_2228 = arith.constant 1 : index
%c128_2229 = arith.constant 128 : index
%c2_2230 = arith.constant 2 : index
%c28_2231 = arith.constant 28 : index
%c3_2232 = arith.constant 3 : index
%c28_2233 = arith.constant 28 : index
%c0_2234 = arith.constant 0 : index
%c128_2235 = arith.constant 128 : index
%c1_2236 = arith.constant 1 : index
%c128_2237 = arith.constant 128 : index
%c2_2238 = arith.constant 2 : index
%c3_2239 = arith.constant 3 : index
%c3_2240 = arith.constant 3 : index
%c3_2241 = arith.constant 3 : index
%2233 = arith.index_cast %2228 : i64 to index
%c0_2242 = arith.constant 0 : index
%2234 = arith.remsi %c128_2229, %2233 : index
%2235 = arith.cmpi eq, %c0_2242, %2234 : index
cf.assert %2235, "invalid: groups must divide input channel size evenly."
%c0_2243 = arith.constant 0 : index
%2236 = arith.remsi %c128_2235, %2233 : index
%2237 = arith.cmpi eq, %c0_2243, %2236 : index
cf.assert %2237, "invalid: groups must divide weight batch size evenly."
%c1_i64_2244 = arith.constant 1 : i64
%c1_i64_2245 = arith.constant 1 : i64
%c1_i64_2246 = arith.constant 1 : i64
%c1_i64_2247 = arith.constant 1 : i64
%cst_2248 = arith.constant 0.000000e+00 : f32
%c0_2249 = arith.constant 0 : index
%c1_2250 = arith.constant 1 : index
%c1_2251 = arith.constant 1 : index
%c128_2252 = arith.constant 128 : index
%c2_2253 = arith.constant 2 : index
%c28_2254 = arith.constant 28 : index
%c3_2255 = arith.constant 3 : index
%c28_2256 = arith.constant 28 : index
%c0_i64_2257 = arith.constant 0 : i64
%2238 = arith.index_cast %c0_i64_2257 : i64 to index
%2239 = arith.index_cast %c0_i64_2257 : i64 to index
%2240 = arith.index_cast %2229 : i64 to index
%2241 = arith.index_cast %2230 : i64 to index
%padded_2258 = tensor.pad %cast_2182 low[%2238, %2239, %2240, %2241] high[%2238, %2239, %2240, %2241] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2248 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%2242 = arith.index_cast %c3_2239 : index to i64
%c1_i64_2259 = arith.constant 1 : i64
%c2_i64_2260 = arith.constant 2 : i64
%2243 = arith.muli %2229, %c2_i64_2260 : i64
%2244 = arith.index_cast %c28_2231 : index to i64
%2245 = arith.addi %2244, %2243 : i64
%2246 = arith.subi %2242, %c1_i64_2259 : i64
%2247 = arith.muli %c1_i64_2244, %2246 : i64
%2248 = arith.subi %2245, %2247 : i64
%2249 = arith.subi %2248, %c1_i64_2259 : i64
%2250 = arith.floordivsi %2249, %c1_i64_2246 : i64
%2251 = arith.addi %2250, %c1_i64_2259 : i64
%2252 = arith.index_cast %2251 : i64 to index
%2253 = arith.index_cast %c3_2241 : index to i64
%c1_i64_2261 = arith.constant 1 : i64
%c2_i64_2262 = arith.constant 2 : i64
%2254 = arith.muli %2230, %c2_i64_2262 : i64
%2255 = arith.index_cast %c28_2233 : index to i64
%2256 = arith.addi %2255, %2254 : i64
%2257 = arith.subi %2253, %c1_i64_2261 : i64
%2258 = arith.muli %c1_i64_2245, %2257 : i64
%2259 = arith.subi %2256, %2258 : i64
%2260 = arith.subi %2259, %c1_i64_2261 : i64
%2261 = arith.floordivsi %2260, %c1_i64_2247 : i64
%2262 = arith.addi %2261, %c1_i64_2261 : i64
%2263 = arith.index_cast %2262 : i64 to index
%2264 = tensor.empty(%2252, %2263) : tensor<1x128x?x?xf32>
%2265 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2216 : tensor<128xf32>) outs(%2264 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%2266 = arith.floordivsi %c128_2229, %2233 : index
%2267 = arith.floordivsi %c128_2235, %2233 : index
%c0_2263 = arith.constant 0 : index
%c1_2264 = arith.constant 1 : index
%2268 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2258, %cast_2205 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%2265 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_2265 = tensor.cast %2268 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_2266 = arith.constant 1 : index
%c1_2267 = arith.constant 1 : index
%c128_2268 = arith.constant 128 : index
%c2_2269 = arith.constant 2 : index
%c28_2270 = arith.constant 28 : index
%c3_2271 = arith.constant 3 : index
%c28_2272 = arith.constant 28 : index
%2269 = tensor.empty() : tensor<1x128x28x28xf32>
%2270 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2265 : tensor<1x128x28x28xf32>) outs(%2269 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_2273 = tensor.cast %2270 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2271 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2272 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2274 = torch.constant.int 12
%2273 = torch.aten.item %2271 : !torch.vtensor<[],f32> -> !torch.float
%2274 = torch_c.to_f64 %2273
%2275 = torch.aten.item %2272 : !torch.vtensor<[],si8> -> !torch.int
%2276 = torch_c.to_i64 %2275
%c1_2275 = arith.constant 1 : index
%c1_2276 = arith.constant 1 : index
%c128_2277 = arith.constant 128 : index
%c2_2278 = arith.constant 2 : index
%c28_2279 = arith.constant 28 : index
%c3_2280 = arith.constant 3 : index
%c28_2281 = arith.constant 28 : index
%2277 = tensor.empty() : tensor<1x128x28x28xi8>
%2278 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2273 : tensor<1x128x28x28xf32>) outs(%2277 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2275
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2273
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_2282 = tensor.cast %2278 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_2283 = tensor.cast %cast_2282 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%2279 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2280 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2281 = torch.aten.item %2279 : !torch.vtensor<[],f32> -> !torch.float
%2282 = torch_c.to_f64 %2281
%2283 = torch.aten.item %2280 : !torch.vtensor<[],si8> -> !torch.int
%2284 = torch_c.to_i64 %2283
%cast_2284 = tensor.cast %cast_2283 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_2285 = arith.constant 1 : index
%c1_2286 = arith.constant 1 : index
%c128_2287 = arith.constant 128 : index
%c2_2288 = arith.constant 2 : index
%c28_2289 = arith.constant 28 : index
%c3_2290 = arith.constant 3 : index
%c28_2291 = arith.constant 28 : index
%2285 = tensor.empty() : tensor<1x128x28x28xf32>
%2286 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2284 : tensor<1x128x28x28xi8>) outs(%2285 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2283
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2281
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_2292 = tensor.cast %2286 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2287 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2288 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2293 = torch.constant.int 12
%2289 = torch.aten.item %2287 : !torch.vtensor<[],f32> -> !torch.float
%2290 = torch_c.to_f64 %2289
%2291 = torch.aten.item %2288 : !torch.vtensor<[],si8> -> !torch.int
%2292 = torch_c.to_i64 %2291
%c1_2294 = arith.constant 1 : index
%c0_2295 = arith.constant 0 : index
%c512_2296 = arith.constant 512 : index
%c1_2297 = arith.constant 1 : index
%c128_2298 = arith.constant 128 : index
%2293 = tensor.empty() : tensor<512x128x1x1xi8>
%2294 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%82 : tensor<512x128x1x1xf32>) outs(%2293 : tensor<512x128x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2291
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2289
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x128x1x1xi8>
%cast_2299 = tensor.cast %2294 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%cast_2300 = tensor.cast %cast_2299 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%2295 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2296 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2297 = torch.aten.item %2295 : !torch.vtensor<[],f32> -> !torch.float
%2298 = torch_c.to_f64 %2297
%2299 = torch.aten.item %2296 : !torch.vtensor<[],si8> -> !torch.int
%2300 = torch_c.to_i64 %2299
%cast_2301 = tensor.cast %cast_2300 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%c1_2302 = arith.constant 1 : index
%c0_2303 = arith.constant 0 : index
%c512_2304 = arith.constant 512 : index
%c1_2305 = arith.constant 1 : index
%c128_2306 = arith.constant 128 : index
%2301 = tensor.empty() : tensor<512x128x1x1xf32>
%2302 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2301 : tensor<512x128x1x1xi8>) outs(%2301 : tensor<512x128x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2299
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2297
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x128x1x1xf32>
%cast_2307 = tensor.cast %2302 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
%2303 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2304 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2308 = torch.constant.int 12
%2305 = torch.aten.item %2303 : !torch.vtensor<[],f32> -> !torch.float
%2306 = torch_c.to_f64 %2305
%2307 = torch.aten.item %2304 : !torch.vtensor<[],si8> -> !torch.int
%2308 = torch_c.to_i64 %2307
%c1_2309 = arith.constant 1 : index
%c0_2310 = arith.constant 0 : index
%c512_2311 = arith.constant 512 : index
%2309 = tensor.empty() : tensor<512xi8>
%2310 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%84 : tensor<512xf32>) outs(%2309 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2307
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2305
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_2312 = tensor.cast %2310 : tensor<512xi8> to tensor<512xi8>
%cast_2313 = tensor.cast %cast_2312 : tensor<512xi8> to tensor<512xi8>
%2311 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2312 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2313 = torch.aten.item %2311 : !torch.vtensor<[],f32> -> !torch.float
%2314 = torch_c.to_f64 %2313
%2315 = torch.aten.item %2312 : !torch.vtensor<[],si8> -> !torch.int
%2316 = torch_c.to_i64 %2315
%cast_2314 = tensor.cast %cast_2313 : tensor<512xi8> to tensor<512xi8>
%c1_2315 = arith.constant 1 : index
%c0_2316 = arith.constant 0 : index
%c512_2317 = arith.constant 512 : index
%2317 = tensor.empty() : tensor<512xf32>
%2318 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2314 : tensor<512xi8>) outs(%2317 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2315
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2313
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_2318 = tensor.cast %2318 : tensor<512xf32> to tensor<512xf32>
%int0_2319 = torch.constant.int 0
%int0_2320 = torch.constant.int 0
%int1_2321 = torch.constant.int 1
%int1_2322 = torch.constant.int 1
%int1_2323 = torch.constant.int 1
%int1_2324 = torch.constant.int 1
%int0_2325 = torch.constant.int 0
%2319 = torch.prim.ListConstruct %int0_2319, %int0_2320 : (!torch.int, !torch.int) -> !torch.list<int>
%2320 = torch.prim.ListConstruct %int1_2321, %int1_2322 : (!torch.int, !torch.int) -> !torch.list<int>
%2321 = torch.prim.ListConstruct %int1_2323, %int1_2324 : (!torch.int, !torch.int) -> !torch.list<int>
%2322 = torch.prim.ListConstruct %int0_2325, %int0_2325 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2326 = torch.constant.bool false
%int1_2327 = torch.constant.int 1
%2323 = torch_c.to_i64 %int1_2327
%2324 = torch_c.to_i64 %int0_2319
%2325 = torch_c.to_i64 %int0_2320
%2326 = torch_c.to_i64 %int0_2325
%2327 = torch_c.to_i64 %int0_2325
%c0_2328 = arith.constant 0 : index
%c1_2329 = arith.constant 1 : index
%c1_2330 = arith.constant 1 : index
%c128_2331 = arith.constant 128 : index
%c2_2332 = arith.constant 2 : index
%c28_2333 = arith.constant 28 : index
%c3_2334 = arith.constant 3 : index
%c28_2335 = arith.constant 28 : index
%c0_2336 = arith.constant 0 : index
%c512_2337 = arith.constant 512 : index
%c1_2338 = arith.constant 1 : index
%c128_2339 = arith.constant 128 : index
%c2_2340 = arith.constant 2 : index
%c1_2341 = arith.constant 1 : index
%c3_2342 = arith.constant 3 : index
%c1_2343 = arith.constant 1 : index
%2328 = arith.index_cast %2323 : i64 to index
%c0_2344 = arith.constant 0 : index
%2329 = arith.remsi %c128_2331, %2328 : index
%2330 = arith.cmpi eq, %c0_2344, %2329 : index
cf.assert %2330, "invalid: groups must divide input channel size evenly."
%c0_2345 = arith.constant 0 : index
%2331 = arith.remsi %c512_2337, %2328 : index
%2332 = arith.cmpi eq, %c0_2345, %2331 : index
cf.assert %2332, "invalid: groups must divide weight batch size evenly."
%c1_i64_2346 = arith.constant 1 : i64
%c1_i64_2347 = arith.constant 1 : i64
%c1_i64_2348 = arith.constant 1 : i64
%c1_i64_2349 = arith.constant 1 : i64
%cst_2350 = arith.constant 0.000000e+00 : f32
%c0_2351 = arith.constant 0 : index
%c1_2352 = arith.constant 1 : index
%c1_2353 = arith.constant 1 : index
%c128_2354 = arith.constant 128 : index
%c2_2355 = arith.constant 2 : index
%c28_2356 = arith.constant 28 : index
%c3_2357 = arith.constant 3 : index
%c28_2358 = arith.constant 28 : index
%c0_i64_2359 = arith.constant 0 : i64
%2333 = arith.index_cast %c0_i64_2359 : i64 to index
%2334 = arith.index_cast %c0_i64_2359 : i64 to index
%2335 = arith.index_cast %2324 : i64 to index
%2336 = arith.index_cast %2325 : i64 to index
%padded_2360 = tensor.pad %cast_2292 low[%2333, %2334, %2335, %2336] high[%2333, %2334, %2335, %2336] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2350 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%2337 = arith.index_cast %c1_2341 : index to i64
%c1_i64_2361 = arith.constant 1 : i64
%c2_i64_2362 = arith.constant 2 : i64
%2338 = arith.muli %2324, %c2_i64_2362 : i64
%2339 = arith.index_cast %c28_2333 : index to i64
%2340 = arith.addi %2339, %2338 : i64
%2341 = arith.subi %2337, %c1_i64_2361 : i64
%2342 = arith.muli %c1_i64_2346, %2341 : i64
%2343 = arith.subi %2340, %2342 : i64
%2344 = arith.subi %2343, %c1_i64_2361 : i64
%2345 = arith.floordivsi %2344, %c1_i64_2348 : i64
%2346 = arith.addi %2345, %c1_i64_2361 : i64
%2347 = arith.index_cast %2346 : i64 to index
%2348 = arith.index_cast %c1_2343 : index to i64
%c1_i64_2363 = arith.constant 1 : i64
%c2_i64_2364 = arith.constant 2 : i64
%2349 = arith.muli %2325, %c2_i64_2364 : i64
%2350 = arith.index_cast %c28_2335 : index to i64
%2351 = arith.addi %2350, %2349 : i64
%2352 = arith.subi %2348, %c1_i64_2363 : i64
%2353 = arith.muli %c1_i64_2347, %2352 : i64
%2354 = arith.subi %2351, %2353 : i64
%2355 = arith.subi %2354, %c1_i64_2363 : i64
%2356 = arith.floordivsi %2355, %c1_i64_2349 : i64
%2357 = arith.addi %2356, %c1_i64_2363 : i64
%2358 = arith.index_cast %2357 : i64 to index
%2359 = tensor.empty(%2347, %2358) : tensor<1x512x?x?xf32>
%2360 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2318 : tensor<512xf32>) outs(%2359 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%2361 = arith.floordivsi %c128_2331, %2328 : index
%2362 = arith.floordivsi %c512_2337, %2328 : index
%c0_2365 = arith.constant 0 : index
%c1_2366 = arith.constant 1 : index
%2363 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2360, %cast_2307 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2360 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_2367 = tensor.cast %2363 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
%2364 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2365 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2368 = torch.constant.int 12
%2366 = torch.aten.item %2364 : !torch.vtensor<[],f32> -> !torch.float
%2367 = torch_c.to_f64 %2366
%2368 = torch.aten.item %2365 : !torch.vtensor<[],si8> -> !torch.int
%2369 = torch_c.to_i64 %2368
%c1_2369 = arith.constant 1 : index
%c1_2370 = arith.constant 1 : index
%c512_2371 = arith.constant 512 : index
%c2_2372 = arith.constant 2 : index
%c28_2373 = arith.constant 28 : index
%c3_2374 = arith.constant 3 : index
%c28_2375 = arith.constant 28 : index
%2370 = tensor.empty() : tensor<1x512x28x28xi8>
%2371 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2367 : tensor<1x512x28x28xf32>) outs(%2370 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2368
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2366
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2376 = tensor.cast %2371 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2377 = tensor.cast %cast_2376 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2372 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2373 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2374 = torch.aten.item %2372 : !torch.vtensor<[],f32> -> !torch.float
%2375 = torch_c.to_f64 %2374
%2376 = torch.aten.item %2373 : !torch.vtensor<[],si8> -> !torch.int
%2377 = torch_c.to_i64 %2376
%cast_2378 = tensor.cast %cast_2377 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2379 = arith.constant 1 : index
%c1_2380 = arith.constant 1 : index
%c512_2381 = arith.constant 512 : index
%c2_2382 = arith.constant 2 : index
%c28_2383 = arith.constant 28 : index
%c3_2384 = arith.constant 3 : index
%c28_2385 = arith.constant 28 : index
%2378 = tensor.empty() : tensor<1x512x28x28xf32>
%2379 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2378 : tensor<1x512x28x28xi8>) outs(%2378 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2376
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2374
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2386 = tensor.cast %2379 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%int1_2387 = torch.constant.int 1
%2380 = torch_c.to_i64 %int1_2387
%c1_2388 = arith.constant 1 : index
%c1_2389 = arith.constant 1 : index
%c512_2390 = arith.constant 512 : index
%c2_2391 = arith.constant 2 : index
%c28_2392 = arith.constant 28 : index
%c3_2393 = arith.constant 3 : index
%c28_2394 = arith.constant 28 : index
%c1_2395 = arith.constant 1 : index
%c512_2396 = arith.constant 512 : index
%2381 = arith.cmpi eq, %c512_2390, %c512_2396 : index
cf.assert %2381, "mismatched size for broadcast"
%c2_2397 = arith.constant 2 : index
%c28_2398 = arith.constant 28 : index
%2382 = arith.cmpi eq, %c28_2392, %c28_2398 : index
cf.assert %2382, "mismatched size for broadcast"
%c3_2399 = arith.constant 3 : index
%c28_2400 = arith.constant 28 : index
%2383 = arith.cmpi eq, %c28_2394, %c28_2400 : index
cf.assert %2383, "mismatched size for broadcast"
%2384 = tensor.empty() : tensor<1x512x28x28xf32>
%2385 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2386, %cast_2080 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2384 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %2380 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x512x28x28xf32>
%cast_2401 = tensor.cast %2385 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%c1_2402 = arith.constant 1 : index
%c1_2403 = arith.constant 1 : index
%c512_2404 = arith.constant 512 : index
%c2_2405 = arith.constant 2 : index
%c28_2406 = arith.constant 28 : index
%c3_2407 = arith.constant 3 : index
%c28_2408 = arith.constant 28 : index
%2386 = tensor.empty() : tensor<1x512x28x28xf32>
%2387 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2401 : tensor<1x512x28x28xf32>) outs(%2386 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x28x28xf32>
%cast_2409 = tensor.cast %2387 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2388 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2389 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2410 = torch.constant.int 12
%2390 = torch.aten.item %2388 : !torch.vtensor<[],f32> -> !torch.float
%2391 = torch_c.to_f64 %2390
%2392 = torch.aten.item %2389 : !torch.vtensor<[],si8> -> !torch.int
%2393 = torch_c.to_i64 %2392
%c1_2411 = arith.constant 1 : index
%c1_2412 = arith.constant 1 : index
%c512_2413 = arith.constant 512 : index
%c2_2414 = arith.constant 2 : index
%c28_2415 = arith.constant 28 : index
%c3_2416 = arith.constant 3 : index
%c28_2417 = arith.constant 28 : index
%2394 = tensor.empty() : tensor<1x512x28x28xi8>
%2395 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2409 : tensor<1x512x28x28xf32>) outs(%2394 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2392
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2390
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2418 = tensor.cast %2395 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2419 = tensor.cast %cast_2418 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2396 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2397 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2398 = torch.aten.item %2396 : !torch.vtensor<[],f32> -> !torch.float
%2399 = torch_c.to_f64 %2398
%2400 = torch.aten.item %2397 : !torch.vtensor<[],si8> -> !torch.int
%2401 = torch_c.to_i64 %2400
%cast_2420 = tensor.cast %cast_2419 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2421 = arith.constant 1 : index
%c1_2422 = arith.constant 1 : index
%c512_2423 = arith.constant 512 : index
%c2_2424 = arith.constant 2 : index
%c28_2425 = arith.constant 28 : index
%c3_2426 = arith.constant 3 : index
%c28_2427 = arith.constant 28 : index
%2402 = tensor.empty() : tensor<1x512x28x28xf32>
%2403 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2420 : tensor<1x512x28x28xi8>) outs(%2402 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2400
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2398
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2428 = tensor.cast %2403 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2404 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%2405 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2429 = torch.constant.int 12
%2406 = torch.aten.item %2404 : !torch.vtensor<[],f32> -> !torch.float
%2407 = torch_c.to_f64 %2406
%2408 = torch.aten.item %2405 : !torch.vtensor<[],si8> -> !torch.int
%2409 = torch_c.to_i64 %2408
%c1_2430 = arith.constant 1 : index
%c0_2431 = arith.constant 0 : index
%c128_2432 = arith.constant 128 : index
%c1_2433 = arith.constant 1 : index
%c512_2434 = arith.constant 512 : index
%2410 = tensor.empty() : tensor<128x512x1x1xi8>
%2411 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%86 : tensor<128x512x1x1xf32>) outs(%2410 : tensor<128x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2408
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2406
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x512x1x1xi8>
%cast_2435 = tensor.cast %2411 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%cast_2436 = tensor.cast %cast_2435 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%2412 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%2413 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2414 = torch.aten.item %2412 : !torch.vtensor<[],f32> -> !torch.float
%2415 = torch_c.to_f64 %2414
%2416 = torch.aten.item %2413 : !torch.vtensor<[],si8> -> !torch.int
%2417 = torch_c.to_i64 %2416
%cast_2437 = tensor.cast %cast_2436 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
%c1_2438 = arith.constant 1 : index
%c0_2439 = arith.constant 0 : index
%c128_2440 = arith.constant 128 : index
%c1_2441 = arith.constant 1 : index
%c512_2442 = arith.constant 512 : index
%2418 = tensor.empty() : tensor<128x512x1x1xf32>
%2419 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2437 : tensor<128x512x1x1xi8>) outs(%2418 : tensor<128x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2416
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2414
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x512x1x1xf32>
%cast_2443 = tensor.cast %2419 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
%2420 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2421 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2444 = torch.constant.int 12
%2422 = torch.aten.item %2420 : !torch.vtensor<[],f32> -> !torch.float
%2423 = torch_c.to_f64 %2422
%2424 = torch.aten.item %2421 : !torch.vtensor<[],si8> -> !torch.int
%2425 = torch_c.to_i64 %2424
%c1_2445 = arith.constant 1 : index
%c0_2446 = arith.constant 0 : index
%c128_2447 = arith.constant 128 : index
%2426 = tensor.empty() : tensor<128xi8>
%2427 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%88 : tensor<128xf32>) outs(%2426 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2424
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2422
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_2448 = tensor.cast %2427 : tensor<128xi8> to tensor<128xi8>
%cast_2449 = tensor.cast %cast_2448 : tensor<128xi8> to tensor<128xi8>
%2428 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2429 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2430 = torch.aten.item %2428 : !torch.vtensor<[],f32> -> !torch.float
%2431 = torch_c.to_f64 %2430
%2432 = torch.aten.item %2429 : !torch.vtensor<[],si8> -> !torch.int
%2433 = torch_c.to_i64 %2432
%cast_2450 = tensor.cast %cast_2449 : tensor<128xi8> to tensor<128xi8>
%c1_2451 = arith.constant 1 : index
%c0_2452 = arith.constant 0 : index
%c128_2453 = arith.constant 128 : index
%2434 = tensor.empty() : tensor<128xf32>
%2435 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2450 : tensor<128xi8>) outs(%2434 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2432
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2430
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_2454 = tensor.cast %2435 : tensor<128xf32> to tensor<128xf32>
%int0_2455 = torch.constant.int 0
%int0_2456 = torch.constant.int 0
%int1_2457 = torch.constant.int 1
%int1_2458 = torch.constant.int 1
%int1_2459 = torch.constant.int 1
%int1_2460 = torch.constant.int 1
%int0_2461 = torch.constant.int 0
%2436 = torch.prim.ListConstruct %int0_2455, %int0_2456 : (!torch.int, !torch.int) -> !torch.list<int>
%2437 = torch.prim.ListConstruct %int1_2457, %int1_2458 : (!torch.int, !torch.int) -> !torch.list<int>
%2438 = torch.prim.ListConstruct %int1_2459, %int1_2460 : (!torch.int, !torch.int) -> !torch.list<int>
%2439 = torch.prim.ListConstruct %int0_2461, %int0_2461 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2462 = torch.constant.bool false
%int1_2463 = torch.constant.int 1
%2440 = torch_c.to_i64 %int1_2463
%2441 = torch_c.to_i64 %int0_2455
%2442 = torch_c.to_i64 %int0_2456
%2443 = torch_c.to_i64 %int0_2461
%2444 = torch_c.to_i64 %int0_2461
%c0_2464 = arith.constant 0 : index
%c1_2465 = arith.constant 1 : index
%c1_2466 = arith.constant 1 : index
%c512_2467 = arith.constant 512 : index
%c2_2468 = arith.constant 2 : index
%c28_2469 = arith.constant 28 : index
%c3_2470 = arith.constant 3 : index
%c28_2471 = arith.constant 28 : index
%c0_2472 = arith.constant 0 : index
%c128_2473 = arith.constant 128 : index
%c1_2474 = arith.constant 1 : index
%c512_2475 = arith.constant 512 : index
%c2_2476 = arith.constant 2 : index
%c1_2477 = arith.constant 1 : index
%c3_2478 = arith.constant 3 : index
%c1_2479 = arith.constant 1 : index
%2445 = arith.index_cast %2440 : i64 to index
%c0_2480 = arith.constant 0 : index
%2446 = arith.remsi %c512_2467, %2445 : index
%2447 = arith.cmpi eq, %c0_2480, %2446 : index
cf.assert %2447, "invalid: groups must divide input channel size evenly."
%c0_2481 = arith.constant 0 : index
%2448 = arith.remsi %c128_2473, %2445 : index
%2449 = arith.cmpi eq, %c0_2481, %2448 : index
cf.assert %2449, "invalid: groups must divide weight batch size evenly."
%c1_i64_2482 = arith.constant 1 : i64
%c1_i64_2483 = arith.constant 1 : i64
%c1_i64_2484 = arith.constant 1 : i64
%c1_i64_2485 = arith.constant 1 : i64
%cst_2486 = arith.constant 0.000000e+00 : f32
%c0_2487 = arith.constant 0 : index
%c1_2488 = arith.constant 1 : index
%c1_2489 = arith.constant 1 : index
%c512_2490 = arith.constant 512 : index
%c2_2491 = arith.constant 2 : index
%c28_2492 = arith.constant 28 : index
%c3_2493 = arith.constant 3 : index
%c28_2494 = arith.constant 28 : index
%c0_i64_2495 = arith.constant 0 : i64
%2450 = arith.index_cast %c0_i64_2495 : i64 to index
%2451 = arith.index_cast %c0_i64_2495 : i64 to index
%2452 = arith.index_cast %2441 : i64 to index
%2453 = arith.index_cast %2442 : i64 to index
%padded_2496 = tensor.pad %cast_2428 low[%2450, %2451, %2452, %2453] high[%2450, %2451, %2452, %2453] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2486 : f32
} : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
%2454 = arith.index_cast %c1_2477 : index to i64
%c1_i64_2497 = arith.constant 1 : i64
%c2_i64_2498 = arith.constant 2 : i64
%2455 = arith.muli %2441, %c2_i64_2498 : i64
%2456 = arith.index_cast %c28_2469 : index to i64
%2457 = arith.addi %2456, %2455 : i64
%2458 = arith.subi %2454, %c1_i64_2497 : i64
%2459 = arith.muli %c1_i64_2482, %2458 : i64
%2460 = arith.subi %2457, %2459 : i64
%2461 = arith.subi %2460, %c1_i64_2497 : i64
%2462 = arith.floordivsi %2461, %c1_i64_2484 : i64
%2463 = arith.addi %2462, %c1_i64_2497 : i64
%2464 = arith.index_cast %2463 : i64 to index
%2465 = arith.index_cast %c1_2479 : index to i64
%c1_i64_2499 = arith.constant 1 : i64
%c2_i64_2500 = arith.constant 2 : i64
%2466 = arith.muli %2442, %c2_i64_2500 : i64
%2467 = arith.index_cast %c28_2471 : index to i64
%2468 = arith.addi %2467, %2466 : i64
%2469 = arith.subi %2465, %c1_i64_2499 : i64
%2470 = arith.muli %c1_i64_2483, %2469 : i64
%2471 = arith.subi %2468, %2470 : i64
%2472 = arith.subi %2471, %c1_i64_2499 : i64
%2473 = arith.floordivsi %2472, %c1_i64_2485 : i64
%2474 = arith.addi %2473, %c1_i64_2499 : i64
%2475 = arith.index_cast %2474 : i64 to index
%2476 = tensor.empty(%2464, %2475) : tensor<1x128x?x?xf32>
%2477 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2454 : tensor<128xf32>) outs(%2476 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%2478 = arith.floordivsi %c512_2467, %2445 : index
%2479 = arith.floordivsi %c128_2473, %2445 : index
%c0_2501 = arith.constant 0 : index
%c1_2502 = arith.constant 1 : index
%2480 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2496, %cast_2443 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%2477 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_2503 = tensor.cast %2480 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_2504 = arith.constant 1 : index
%c1_2505 = arith.constant 1 : index
%c128_2506 = arith.constant 128 : index
%c2_2507 = arith.constant 2 : index
%c28_2508 = arith.constant 28 : index
%c3_2509 = arith.constant 3 : index
%c28_2510 = arith.constant 28 : index
%2481 = tensor.empty() : tensor<1x128x28x28xf32>
%2482 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2503 : tensor<1x128x28x28xf32>) outs(%2481 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_2511 = tensor.cast %2482 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2483 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2484 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2512 = torch.constant.int 12
%2485 = torch.aten.item %2483 : !torch.vtensor<[],f32> -> !torch.float
%2486 = torch_c.to_f64 %2485
%2487 = torch.aten.item %2484 : !torch.vtensor<[],si8> -> !torch.int
%2488 = torch_c.to_i64 %2487
%c1_2513 = arith.constant 1 : index
%c1_2514 = arith.constant 1 : index
%c128_2515 = arith.constant 128 : index
%c2_2516 = arith.constant 2 : index
%c28_2517 = arith.constant 28 : index
%c3_2518 = arith.constant 3 : index
%c28_2519 = arith.constant 28 : index
%2489 = tensor.empty() : tensor<1x128x28x28xi8>
%2490 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2511 : tensor<1x128x28x28xf32>) outs(%2489 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2487
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2485
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_2520 = tensor.cast %2490 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_2521 = tensor.cast %cast_2520 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%2491 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2492 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2493 = torch.aten.item %2491 : !torch.vtensor<[],f32> -> !torch.float
%2494 = torch_c.to_f64 %2493
%2495 = torch.aten.item %2492 : !torch.vtensor<[],si8> -> !torch.int
%2496 = torch_c.to_i64 %2495
%cast_2522 = tensor.cast %cast_2521 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_2523 = arith.constant 1 : index
%c1_2524 = arith.constant 1 : index
%c128_2525 = arith.constant 128 : index
%c2_2526 = arith.constant 2 : index
%c28_2527 = arith.constant 28 : index
%c3_2528 = arith.constant 3 : index
%c28_2529 = arith.constant 28 : index
%2497 = tensor.empty() : tensor<1x128x28x28xf32>
%2498 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2522 : tensor<1x128x28x28xi8>) outs(%2497 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2495
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2493
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_2530 = tensor.cast %2498 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2499 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2500 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2531 = torch.constant.int 12
%2501 = torch.aten.item %2499 : !torch.vtensor<[],f32> -> !torch.float
%2502 = torch_c.to_f64 %2501
%2503 = torch.aten.item %2500 : !torch.vtensor<[],si8> -> !torch.int
%2504 = torch_c.to_i64 %2503
%c1_2532 = arith.constant 1 : index
%c0_2533 = arith.constant 0 : index
%c128_2534 = arith.constant 128 : index
%c1_2535 = arith.constant 1 : index
%c128_2536 = arith.constant 128 : index
%c2_2537 = arith.constant 2 : index
%c3_2538 = arith.constant 3 : index
%c3_2539 = arith.constant 3 : index
%c3_2540 = arith.constant 3 : index
%2505 = tensor.empty() : tensor<128x128x3x3xi8>
%2506 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%90 : tensor<128x128x3x3xf32>) outs(%2505 : tensor<128x128x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2503
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2501
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128x128x3x3xi8>
%cast_2541 = tensor.cast %2506 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%cast_2542 = tensor.cast %cast_2541 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%2507 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2508 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2509 = torch.aten.item %2507 : !torch.vtensor<[],f32> -> !torch.float
%2510 = torch_c.to_f64 %2509
%2511 = torch.aten.item %2508 : !torch.vtensor<[],si8> -> !torch.int
%2512 = torch_c.to_i64 %2511
%cast_2543 = tensor.cast %cast_2542 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
%c1_2544 = arith.constant 1 : index
%c0_2545 = arith.constant 0 : index
%c128_2546 = arith.constant 128 : index
%c1_2547 = arith.constant 1 : index
%c128_2548 = arith.constant 128 : index
%c2_2549 = arith.constant 2 : index
%c3_2550 = arith.constant 3 : index
%c3_2551 = arith.constant 3 : index
%c3_2552 = arith.constant 3 : index
%2513 = tensor.empty() : tensor<128x128x3x3xf32>
%2514 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2543 : tensor<128x128x3x3xi8>) outs(%2513 : tensor<128x128x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2511
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2509
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128x128x3x3xf32>
%cast_2553 = tensor.cast %2514 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
%2515 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2516 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2554 = torch.constant.int 12
%2517 = torch.aten.item %2515 : !torch.vtensor<[],f32> -> !torch.float
%2518 = torch_c.to_f64 %2517
%2519 = torch.aten.item %2516 : !torch.vtensor<[],si8> -> !torch.int
%2520 = torch_c.to_i64 %2519
%c1_2555 = arith.constant 1 : index
%c0_2556 = arith.constant 0 : index
%c128_2557 = arith.constant 128 : index
%2521 = tensor.empty() : tensor<128xi8>
%2522 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%92 : tensor<128xf32>) outs(%2521 : tensor<128xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2519
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2517
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<128xi8>
%cast_2558 = tensor.cast %2522 : tensor<128xi8> to tensor<128xi8>
%cast_2559 = tensor.cast %cast_2558 : tensor<128xi8> to tensor<128xi8>
%2523 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2524 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2525 = torch.aten.item %2523 : !torch.vtensor<[],f32> -> !torch.float
%2526 = torch_c.to_f64 %2525
%2527 = torch.aten.item %2524 : !torch.vtensor<[],si8> -> !torch.int
%2528 = torch_c.to_i64 %2527
%cast_2560 = tensor.cast %cast_2559 : tensor<128xi8> to tensor<128xi8>
%c1_2561 = arith.constant 1 : index
%c0_2562 = arith.constant 0 : index
%c128_2563 = arith.constant 128 : index
%2529 = tensor.empty() : tensor<128xf32>
%2530 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2560 : tensor<128xi8>) outs(%2529 : tensor<128xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2527
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2525
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<128xf32>
%cast_2564 = tensor.cast %2530 : tensor<128xf32> to tensor<128xf32>
%int1_2565 = torch.constant.int 1
%int1_2566 = torch.constant.int 1
%int1_2567 = torch.constant.int 1
%int1_2568 = torch.constant.int 1
%int1_2569 = torch.constant.int 1
%int1_2570 = torch.constant.int 1
%int0_2571 = torch.constant.int 0
%2531 = torch.prim.ListConstruct %int1_2565, %int1_2566 : (!torch.int, !torch.int) -> !torch.list<int>
%2532 = torch.prim.ListConstruct %int1_2567, %int1_2568 : (!torch.int, !torch.int) -> !torch.list<int>
%2533 = torch.prim.ListConstruct %int1_2569, %int1_2570 : (!torch.int, !torch.int) -> !torch.list<int>
%2534 = torch.prim.ListConstruct %int0_2571, %int0_2571 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2572 = torch.constant.bool false
%int1_2573 = torch.constant.int 1
%2535 = torch_c.to_i64 %int1_2573
%2536 = torch_c.to_i64 %int1_2565
%2537 = torch_c.to_i64 %int1_2566
%2538 = torch_c.to_i64 %int0_2571
%2539 = torch_c.to_i64 %int0_2571
%c0_2574 = arith.constant 0 : index
%c1_2575 = arith.constant 1 : index
%c1_2576 = arith.constant 1 : index
%c128_2577 = arith.constant 128 : index
%c2_2578 = arith.constant 2 : index
%c28_2579 = arith.constant 28 : index
%c3_2580 = arith.constant 3 : index
%c28_2581 = arith.constant 28 : index
%c0_2582 = arith.constant 0 : index
%c128_2583 = arith.constant 128 : index
%c1_2584 = arith.constant 1 : index
%c128_2585 = arith.constant 128 : index
%c2_2586 = arith.constant 2 : index
%c3_2587 = arith.constant 3 : index
%c3_2588 = arith.constant 3 : index
%c3_2589 = arith.constant 3 : index
%2540 = arith.index_cast %2535 : i64 to index
%c0_2590 = arith.constant 0 : index
%2541 = arith.remsi %c128_2577, %2540 : index
%2542 = arith.cmpi eq, %c0_2590, %2541 : index
cf.assert %2542, "invalid: groups must divide input channel size evenly."
%c0_2591 = arith.constant 0 : index
%2543 = arith.remsi %c128_2583, %2540 : index
%2544 = arith.cmpi eq, %c0_2591, %2543 : index
cf.assert %2544, "invalid: groups must divide weight batch size evenly."
%c1_i64_2592 = arith.constant 1 : i64
%c1_i64_2593 = arith.constant 1 : i64
%c1_i64_2594 = arith.constant 1 : i64
%c1_i64_2595 = arith.constant 1 : i64
%cst_2596 = arith.constant 0.000000e+00 : f32
%c0_2597 = arith.constant 0 : index
%c1_2598 = arith.constant 1 : index
%c1_2599 = arith.constant 1 : index
%c128_2600 = arith.constant 128 : index
%c2_2601 = arith.constant 2 : index
%c28_2602 = arith.constant 28 : index
%c3_2603 = arith.constant 3 : index
%c28_2604 = arith.constant 28 : index
%c0_i64_2605 = arith.constant 0 : i64
%2545 = arith.index_cast %c0_i64_2605 : i64 to index
%2546 = arith.index_cast %c0_i64_2605 : i64 to index
%2547 = arith.index_cast %2536 : i64 to index
%2548 = arith.index_cast %2537 : i64 to index
%padded_2606 = tensor.pad %cast_2530 low[%2545, %2546, %2547, %2548] high[%2545, %2546, %2547, %2548] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2596 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%2549 = arith.index_cast %c3_2587 : index to i64
%c1_i64_2607 = arith.constant 1 : i64
%c2_i64_2608 = arith.constant 2 : i64
%2550 = arith.muli %2536, %c2_i64_2608 : i64
%2551 = arith.index_cast %c28_2579 : index to i64
%2552 = arith.addi %2551, %2550 : i64
%2553 = arith.subi %2549, %c1_i64_2607 : i64
%2554 = arith.muli %c1_i64_2592, %2553 : i64
%2555 = arith.subi %2552, %2554 : i64
%2556 = arith.subi %2555, %c1_i64_2607 : i64
%2557 = arith.floordivsi %2556, %c1_i64_2594 : i64
%2558 = arith.addi %2557, %c1_i64_2607 : i64
%2559 = arith.index_cast %2558 : i64 to index
%2560 = arith.index_cast %c3_2589 : index to i64
%c1_i64_2609 = arith.constant 1 : i64
%c2_i64_2610 = arith.constant 2 : i64
%2561 = arith.muli %2537, %c2_i64_2610 : i64
%2562 = arith.index_cast %c28_2581 : index to i64
%2563 = arith.addi %2562, %2561 : i64
%2564 = arith.subi %2560, %c1_i64_2609 : i64
%2565 = arith.muli %c1_i64_2593, %2564 : i64
%2566 = arith.subi %2563, %2565 : i64
%2567 = arith.subi %2566, %c1_i64_2609 : i64
%2568 = arith.floordivsi %2567, %c1_i64_2595 : i64
%2569 = arith.addi %2568, %c1_i64_2609 : i64
%2570 = arith.index_cast %2569 : i64 to index
%2571 = tensor.empty(%2559, %2570) : tensor<1x128x?x?xf32>
%2572 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2564 : tensor<128xf32>) outs(%2571 : tensor<1x128x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x128x?x?xf32>
%2573 = arith.floordivsi %c128_2577, %2540 : index
%2574 = arith.floordivsi %c128_2583, %2540 : index
%c0_2611 = arith.constant 0 : index
%c1_2612 = arith.constant 1 : index
%2575 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2606, %cast_2553 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%2572 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
%cast_2613 = tensor.cast %2575 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
%c1_2614 = arith.constant 1 : index
%c1_2615 = arith.constant 1 : index
%c128_2616 = arith.constant 128 : index
%c2_2617 = arith.constant 2 : index
%c28_2618 = arith.constant 28 : index
%c3_2619 = arith.constant 3 : index
%c28_2620 = arith.constant 28 : index
%2576 = tensor.empty() : tensor<1x128x28x28xf32>
%2577 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2613 : tensor<1x128x28x28xf32>) outs(%2576 : tensor<1x128x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x128x28x28xf32>
%cast_2621 = tensor.cast %2577 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2578 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2579 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2622 = torch.constant.int 12
%2580 = torch.aten.item %2578 : !torch.vtensor<[],f32> -> !torch.float
%2581 = torch_c.to_f64 %2580
%2582 = torch.aten.item %2579 : !torch.vtensor<[],si8> -> !torch.int
%2583 = torch_c.to_i64 %2582
%c1_2623 = arith.constant 1 : index
%c1_2624 = arith.constant 1 : index
%c128_2625 = arith.constant 128 : index
%c2_2626 = arith.constant 2 : index
%c28_2627 = arith.constant 28 : index
%c3_2628 = arith.constant 3 : index
%c28_2629 = arith.constant 28 : index
%2584 = tensor.empty() : tensor<1x128x28x28xi8>
%2585 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2621 : tensor<1x128x28x28xf32>) outs(%2584 : tensor<1x128x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2582
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2580
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x128x28x28xi8>
%cast_2630 = tensor.cast %2585 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%cast_2631 = tensor.cast %cast_2630 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%2586 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2587 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2588 = torch.aten.item %2586 : !torch.vtensor<[],f32> -> !torch.float
%2589 = torch_c.to_f64 %2588
%2590 = torch.aten.item %2587 : !torch.vtensor<[],si8> -> !torch.int
%2591 = torch_c.to_i64 %2590
%cast_2632 = tensor.cast %cast_2631 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
%c1_2633 = arith.constant 1 : index
%c1_2634 = arith.constant 1 : index
%c128_2635 = arith.constant 128 : index
%c2_2636 = arith.constant 2 : index
%c28_2637 = arith.constant 28 : index
%c3_2638 = arith.constant 3 : index
%c28_2639 = arith.constant 28 : index
%2592 = tensor.empty() : tensor<1x128x28x28xf32>
%2593 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2632 : tensor<1x128x28x28xi8>) outs(%2592 : tensor<1x128x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2590
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2588
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x128x28x28xf32>
%cast_2640 = tensor.cast %2593 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
%2594 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2595 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2641 = torch.constant.int 12
%2596 = torch.aten.item %2594 : !torch.vtensor<[],f32> -> !torch.float
%2597 = torch_c.to_f64 %2596
%2598 = torch.aten.item %2595 : !torch.vtensor<[],si8> -> !torch.int
%2599 = torch_c.to_i64 %2598
%c1_2642 = arith.constant 1 : index
%c0_2643 = arith.constant 0 : index
%c512_2644 = arith.constant 512 : index
%c1_2645 = arith.constant 1 : index
%c128_2646 = arith.constant 128 : index
%2600 = tensor.empty() : tensor<512x128x1x1xi8>
%2601 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94 : tensor<512x128x1x1xf32>) outs(%2600 : tensor<512x128x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2598
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2596
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x128x1x1xi8>
%cast_2647 = tensor.cast %2601 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%cast_2648 = tensor.cast %cast_2647 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%2602 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2603 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2604 = torch.aten.item %2602 : !torch.vtensor<[],f32> -> !torch.float
%2605 = torch_c.to_f64 %2604
%2606 = torch.aten.item %2603 : !torch.vtensor<[],si8> -> !torch.int
%2607 = torch_c.to_i64 %2606
%cast_2649 = tensor.cast %cast_2648 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
%c1_2650 = arith.constant 1 : index
%c0_2651 = arith.constant 0 : index
%c512_2652 = arith.constant 512 : index
%c1_2653 = arith.constant 1 : index
%c128_2654 = arith.constant 128 : index
%2608 = tensor.empty() : tensor<512x128x1x1xf32>
%2609 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2649 : tensor<512x128x1x1xi8>) outs(%2608 : tensor<512x128x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2606
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2604
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x128x1x1xf32>
%cast_2655 = tensor.cast %2609 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
%2610 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2611 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2656 = torch.constant.int 12
%2612 = torch.aten.item %2610 : !torch.vtensor<[],f32> -> !torch.float
%2613 = torch_c.to_f64 %2612
%2614 = torch.aten.item %2611 : !torch.vtensor<[],si8> -> !torch.int
%2615 = torch_c.to_i64 %2614
%c1_2657 = arith.constant 1 : index
%c0_2658 = arith.constant 0 : index
%c512_2659 = arith.constant 512 : index
%2616 = tensor.empty() : tensor<512xi8>
%2617 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%96 : tensor<512xf32>) outs(%2616 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2614
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2612
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_2660 = tensor.cast %2617 : tensor<512xi8> to tensor<512xi8>
%cast_2661 = tensor.cast %cast_2660 : tensor<512xi8> to tensor<512xi8>
%2618 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2619 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2620 = torch.aten.item %2618 : !torch.vtensor<[],f32> -> !torch.float
%2621 = torch_c.to_f64 %2620
%2622 = torch.aten.item %2619 : !torch.vtensor<[],si8> -> !torch.int
%2623 = torch_c.to_i64 %2622
%cast_2662 = tensor.cast %cast_2661 : tensor<512xi8> to tensor<512xi8>
%c1_2663 = arith.constant 1 : index
%c0_2664 = arith.constant 0 : index
%c512_2665 = arith.constant 512 : index
%2624 = tensor.empty() : tensor<512xf32>
%2625 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2662 : tensor<512xi8>) outs(%2624 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2622
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2620
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_2666 = tensor.cast %2625 : tensor<512xf32> to tensor<512xf32>
%int0_2667 = torch.constant.int 0
%int0_2668 = torch.constant.int 0
%int1_2669 = torch.constant.int 1
%int1_2670 = torch.constant.int 1
%int1_2671 = torch.constant.int 1
%int1_2672 = torch.constant.int 1
%int0_2673 = torch.constant.int 0
%2626 = torch.prim.ListConstruct %int0_2667, %int0_2668 : (!torch.int, !torch.int) -> !torch.list<int>
%2627 = torch.prim.ListConstruct %int1_2669, %int1_2670 : (!torch.int, !torch.int) -> !torch.list<int>
%2628 = torch.prim.ListConstruct %int1_2671, %int1_2672 : (!torch.int, !torch.int) -> !torch.list<int>
%2629 = torch.prim.ListConstruct %int0_2673, %int0_2673 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2674 = torch.constant.bool false
%int1_2675 = torch.constant.int 1
%2630 = torch_c.to_i64 %int1_2675
%2631 = torch_c.to_i64 %int0_2667
%2632 = torch_c.to_i64 %int0_2668
%2633 = torch_c.to_i64 %int0_2673
%2634 = torch_c.to_i64 %int0_2673
%c0_2676 = arith.constant 0 : index
%c1_2677 = arith.constant 1 : index
%c1_2678 = arith.constant 1 : index
%c128_2679 = arith.constant 128 : index
%c2_2680 = arith.constant 2 : index
%c28_2681 = arith.constant 28 : index
%c3_2682 = arith.constant 3 : index
%c28_2683 = arith.constant 28 : index
%c0_2684 = arith.constant 0 : index
%c512_2685 = arith.constant 512 : index
%c1_2686 = arith.constant 1 : index
%c128_2687 = arith.constant 128 : index
%c2_2688 = arith.constant 2 : index
%c1_2689 = arith.constant 1 : index
%c3_2690 = arith.constant 3 : index
%c1_2691 = arith.constant 1 : index
%2635 = arith.index_cast %2630 : i64 to index
%c0_2692 = arith.constant 0 : index
%2636 = arith.remsi %c128_2679, %2635 : index
%2637 = arith.cmpi eq, %c0_2692, %2636 : index
cf.assert %2637, "invalid: groups must divide input channel size evenly."
%c0_2693 = arith.constant 0 : index
%2638 = arith.remsi %c512_2685, %2635 : index
%2639 = arith.cmpi eq, %c0_2693, %2638 : index
cf.assert %2639, "invalid: groups must divide weight batch size evenly."
%c1_i64_2694 = arith.constant 1 : i64
%c1_i64_2695 = arith.constant 1 : i64
%c1_i64_2696 = arith.constant 1 : i64
%c1_i64_2697 = arith.constant 1 : i64
%cst_2698 = arith.constant 0.000000e+00 : f32
%c0_2699 = arith.constant 0 : index
%c1_2700 = arith.constant 1 : index
%c1_2701 = arith.constant 1 : index
%c128_2702 = arith.constant 128 : index
%c2_2703 = arith.constant 2 : index
%c28_2704 = arith.constant 28 : index
%c3_2705 = arith.constant 3 : index
%c28_2706 = arith.constant 28 : index
%c0_i64_2707 = arith.constant 0 : i64
%2640 = arith.index_cast %c0_i64_2707 : i64 to index
%2641 = arith.index_cast %c0_i64_2707 : i64 to index
%2642 = arith.index_cast %2631 : i64 to index
%2643 = arith.index_cast %2632 : i64 to index
%padded_2708 = tensor.pad %cast_2640 low[%2640, %2641, %2642, %2643] high[%2640, %2641, %2642, %2643] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2698 : f32
} : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
%2644 = arith.index_cast %c1_2689 : index to i64
%c1_i64_2709 = arith.constant 1 : i64
%c2_i64_2710 = arith.constant 2 : i64
%2645 = arith.muli %2631, %c2_i64_2710 : i64
%2646 = arith.index_cast %c28_2681 : index to i64
%2647 = arith.addi %2646, %2645 : i64
%2648 = arith.subi %2644, %c1_i64_2709 : i64
%2649 = arith.muli %c1_i64_2694, %2648 : i64
%2650 = arith.subi %2647, %2649 : i64
%2651 = arith.subi %2650, %c1_i64_2709 : i64
%2652 = arith.floordivsi %2651, %c1_i64_2696 : i64
%2653 = arith.addi %2652, %c1_i64_2709 : i64
%2654 = arith.index_cast %2653 : i64 to index
%2655 = arith.index_cast %c1_2691 : index to i64
%c1_i64_2711 = arith.constant 1 : i64
%c2_i64_2712 = arith.constant 2 : i64
%2656 = arith.muli %2632, %c2_i64_2712 : i64
%2657 = arith.index_cast %c28_2683 : index to i64
%2658 = arith.addi %2657, %2656 : i64
%2659 = arith.subi %2655, %c1_i64_2711 : i64
%2660 = arith.muli %c1_i64_2695, %2659 : i64
%2661 = arith.subi %2658, %2660 : i64
%2662 = arith.subi %2661, %c1_i64_2711 : i64
%2663 = arith.floordivsi %2662, %c1_i64_2697 : i64
%2664 = arith.addi %2663, %c1_i64_2711 : i64
%2665 = arith.index_cast %2664 : i64 to index
%2666 = tensor.empty(%2654, %2665) : tensor<1x512x?x?xf32>
%2667 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2666 : tensor<512xf32>) outs(%2666 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%2668 = arith.floordivsi %c128_2679, %2635 : index
%2669 = arith.floordivsi %c512_2685, %2635 : index
%c0_2713 = arith.constant 0 : index
%c1_2714 = arith.constant 1 : index
%2670 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2708, %cast_2655 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2667 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_2715 = tensor.cast %2670 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
%2671 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2672 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2716 = torch.constant.int 12
%2673 = torch.aten.item %2671 : !torch.vtensor<[],f32> -> !torch.float
%2674 = torch_c.to_f64 %2673
%2675 = torch.aten.item %2672 : !torch.vtensor<[],si8> -> !torch.int
%2676 = torch_c.to_i64 %2675
%c1_2717 = arith.constant 1 : index
%c1_2718 = arith.constant 1 : index
%c512_2719 = arith.constant 512 : index
%c2_2720 = arith.constant 2 : index
%c28_2721 = arith.constant 28 : index
%c3_2722 = arith.constant 3 : index
%c28_2723 = arith.constant 28 : index
%2677 = tensor.empty() : tensor<1x512x28x28xi8>
%2678 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2715 : tensor<1x512x28x28xf32>) outs(%2677 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2675
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2673
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2724 = tensor.cast %2678 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2725 = tensor.cast %cast_2724 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2679 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2680 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2681 = torch.aten.item %2679 : !torch.vtensor<[],f32> -> !torch.float
%2682 = torch_c.to_f64 %2681
%2683 = torch.aten.item %2680 : !torch.vtensor<[],si8> -> !torch.int
%2684 = torch_c.to_i64 %2683
%cast_2726 = tensor.cast %cast_2725 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2727 = arith.constant 1 : index
%c1_2728 = arith.constant 1 : index
%c512_2729 = arith.constant 512 : index
%c2_2730 = arith.constant 2 : index
%c28_2731 = arith.constant 28 : index
%c3_2732 = arith.constant 3 : index
%c28_2733 = arith.constant 28 : index
%2685 = tensor.empty() : tensor<1x512x28x28xf32>
%2686 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2726 : tensor<1x512x28x28xi8>) outs(%2685 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2683
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2681
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2734 = tensor.cast %2686 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%int1_2735 = torch.constant.int 1
%2687 = torch_c.to_i64 %int1_2735
%c1_2736 = arith.constant 1 : index
%c1_2737 = arith.constant 1 : index
%c512_2738 = arith.constant 512 : index
%c2_2739 = arith.constant 2 : index
%c28_2740 = arith.constant 28 : index
%c3_2741 = arith.constant 3 : index
%c28_2742 = arith.constant 28 : index
%c1_2743 = arith.constant 1 : index
%c512_2744 = arith.constant 512 : index
%2688 = arith.cmpi eq, %c512_2738, %c512_2744 : index
cf.assert %2688, "mismatched size for broadcast"
%c2_2745 = arith.constant 2 : index
%c28_2746 = arith.constant 28 : index
%2689 = arith.cmpi eq, %c28_2740, %c28_2746 : index
cf.assert %2689, "mismatched size for broadcast"
%c3_2747 = arith.constant 3 : index
%c28_2748 = arith.constant 28 : index
%2690 = arith.cmpi eq, %c28_2742, %c28_2748 : index
cf.assert %2690, "mismatched size for broadcast"
%2691 = tensor.empty() : tensor<1x512x28x28xf32>
%2692 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2734, %cast_2428 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2691 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %2687 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x512x28x28xf32>
%cast_2749 = tensor.cast %2692 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%c1_2750 = arith.constant 1 : index
%c1_2751 = arith.constant 1 : index
%c512_2752 = arith.constant 512 : index
%c2_2753 = arith.constant 2 : index
%c28_2754 = arith.constant 28 : index
%c3_2755 = arith.constant 3 : index
%c28_2756 = arith.constant 28 : index
%2693 = tensor.empty() : tensor<1x512x28x28xf32>
%2694 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2749 : tensor<1x512x28x28xf32>) outs(%2693 : tensor<1x512x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x28x28xf32>
%cast_2757 = tensor.cast %2694 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2695 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2696 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2758 = torch.constant.int 12
%2697 = torch.aten.item %2695 : !torch.vtensor<[],f32> -> !torch.float
%2698 = torch_c.to_f64 %2697
%2699 = torch.aten.item %2696 : !torch.vtensor<[],si8> -> !torch.int
%2700 = torch_c.to_i64 %2699
%c1_2759 = arith.constant 1 : index
%c1_2760 = arith.constant 1 : index
%c512_2761 = arith.constant 512 : index
%c2_2762 = arith.constant 2 : index
%c28_2763 = arith.constant 28 : index
%c3_2764 = arith.constant 3 : index
%c28_2765 = arith.constant 28 : index
%2701 = tensor.empty() : tensor<1x512x28x28xi8>
%2702 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2757 : tensor<1x512x28x28xf32>) outs(%2701 : tensor<1x512x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2699
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2697
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x28x28xi8>
%cast_2766 = tensor.cast %2702 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%cast_2767 = tensor.cast %cast_2766 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%2703 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2704 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2705 = torch.aten.item %2703 : !torch.vtensor<[],f32> -> !torch.float
%2706 = torch_c.to_f64 %2705
%2707 = torch.aten.item %2704 : !torch.vtensor<[],si8> -> !torch.int
%2708 = torch_c.to_i64 %2707
%cast_2768 = tensor.cast %cast_2767 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
%c1_2769 = arith.constant 1 : index
%c1_2770 = arith.constant 1 : index
%c512_2771 = arith.constant 512 : index
%c2_2772 = arith.constant 2 : index
%c28_2773 = arith.constant 28 : index
%c3_2774 = arith.constant 3 : index
%c28_2775 = arith.constant 28 : index
%2709 = tensor.empty() : tensor<1x512x28x28xf32>
%2710 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2768 : tensor<1x512x28x28xi8>) outs(%2709 : tensor<1x512x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2707
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2705
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x28x28xf32>
%cast_2776 = tensor.cast %2710 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
%2711 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2712 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2777 = torch.constant.int 12
%2713 = torch.aten.item %2711 : !torch.vtensor<[],f32> -> !torch.float
%2714 = torch_c.to_f64 %2713
%2715 = torch.aten.item %2712 : !torch.vtensor<[],si8> -> !torch.int
%2716 = torch_c.to_i64 %2715
%c1_2778 = arith.constant 1 : index
%c0_2779 = arith.constant 0 : index
%c256_2780 = arith.constant 256 : index
%c1_2781 = arith.constant 1 : index
%c512_2782 = arith.constant 512 : index
%2717 = tensor.empty() : tensor<256x512x1x1xi8>
%2718 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98 : tensor<256x512x1x1xf32>) outs(%2717 : tensor<256x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2715
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2713
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x512x1x1xi8>
%cast_2783 = tensor.cast %2718 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
%cast_2784 = tensor.cast %cast_2783 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
%2719 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2720 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2721 = torch.aten.item %2719 : !torch.vtensor<[],f32> -> !torch.float
%2722 = torch_c.to_f64 %2721
%2723 = torch.aten.item %2720 : !torch.vtensor<[],si8> -> !torch.int
%2724 = torch_c.to_i64 %2723
%cast_2785 = tensor.cast %cast_2784 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
%c1_2786 = arith.constant 1 : index
%c0_2787 = arith.constant 0 : index
%c256_2788 = arith.constant 256 : index
%c1_2789 = arith.constant 1 : index
%c512_2790 = arith.constant 512 : index
%2725 = tensor.empty() : tensor<256x512x1x1xf32>
%2726 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2785 : tensor<256x512x1x1xi8>) outs(%2725 : tensor<256x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2723
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2721
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x512x1x1xf32>
%cast_2791 = tensor.cast %2726 : tensor<256x512x1x1xf32> to tensor<256x512x1x1xf32>
%2727 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2728 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2792 = torch.constant.int 12
%2729 = torch.aten.item %2727 : !torch.vtensor<[],f32> -> !torch.float
%2730 = torch_c.to_f64 %2729
%2731 = torch.aten.item %2728 : !torch.vtensor<[],si8> -> !torch.int
%2732 = torch_c.to_i64 %2731
%c1_2793 = arith.constant 1 : index
%c0_2794 = arith.constant 0 : index
%c256_2795 = arith.constant 256 : index
%2733 = tensor.empty() : tensor<256xi8>
%2734 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%100 : tensor<256xf32>) outs(%2733 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2731
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2729
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_2796 = tensor.cast %2734 : tensor<256xi8> to tensor<256xi8>
%cast_2797 = tensor.cast %cast_2796 : tensor<256xi8> to tensor<256xi8>
%2735 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2736 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2737 = torch.aten.item %2735 : !torch.vtensor<[],f32> -> !torch.float
%2738 = torch_c.to_f64 %2737
%2739 = torch.aten.item %2736 : !torch.vtensor<[],si8> -> !torch.int
%2740 = torch_c.to_i64 %2739
%cast_2798 = tensor.cast %cast_2797 : tensor<256xi8> to tensor<256xi8>
%c1_2799 = arith.constant 1 : index
%c0_2800 = arith.constant 0 : index
%c256_2801 = arith.constant 256 : index
%2741 = tensor.empty() : tensor<256xf32>
%2742 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2798 : tensor<256xi8>) outs(%2741 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2739
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2737
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_2802 = tensor.cast %2742 : tensor<256xf32> to tensor<256xf32>
%int0_2803 = torch.constant.int 0
%int0_2804 = torch.constant.int 0
%int1_2805 = torch.constant.int 1
%int1_2806 = torch.constant.int 1
%int1_2807 = torch.constant.int 1
%int1_2808 = torch.constant.int 1
%int0_2809 = torch.constant.int 0
%2743 = torch.prim.ListConstruct %int0_2803, %int0_2804 : (!torch.int, !torch.int) -> !torch.list<int>
%2744 = torch.prim.ListConstruct %int1_2805, %int1_2806 : (!torch.int, !torch.int) -> !torch.list<int>
%2745 = torch.prim.ListConstruct %int1_2807, %int1_2808 : (!torch.int, !torch.int) -> !torch.list<int>
%2746 = torch.prim.ListConstruct %int0_2809, %int0_2809 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2810 = torch.constant.bool false
%int1_2811 = torch.constant.int 1
%2747 = torch_c.to_i64 %int1_2811
%2748 = torch_c.to_i64 %int0_2803
%2749 = torch_c.to_i64 %int0_2804
%2750 = torch_c.to_i64 %int0_2809
%2751 = torch_c.to_i64 %int0_2809
%c0_2812 = arith.constant 0 : index
%c1_2813 = arith.constant 1 : index
%c1_2814 = arith.constant 1 : index
%c512_2815 = arith.constant 512 : index
%c2_2816 = arith.constant 2 : index
%c28_2817 = arith.constant 28 : index
%c3_2818 = arith.constant 3 : index
%c28_2819 = arith.constant 28 : index
%c0_2820 = arith.constant 0 : index
%c256_2821 = arith.constant 256 : index
%c1_2822 = arith.constant 1 : index
%c512_2823 = arith.constant 512 : index
%c2_2824 = arith.constant 2 : index
%c1_2825 = arith.constant 1 : index
%c3_2826 = arith.constant 3 : index
%c1_2827 = arith.constant 1 : index
%2752 = arith.index_cast %2747 : i64 to index
%c0_2828 = arith.constant 0 : index
%2753 = arith.remsi %c512_2815, %2752 : index
%2754 = arith.cmpi eq, %c0_2828, %2753 : index
cf.assert %2754, "invalid: groups must divide input channel size evenly."
%c0_2829 = arith.constant 0 : index
%2755 = arith.remsi %c256_2821, %2752 : index
%2756 = arith.cmpi eq, %c0_2829, %2755 : index
cf.assert %2756, "invalid: groups must divide weight batch size evenly."
%c1_i64_2830 = arith.constant 1 : i64
%c1_i64_2831 = arith.constant 1 : i64
%c1_i64_2832 = arith.constant 1 : i64
%c1_i64_2833 = arith.constant 1 : i64
%cst_2834 = arith.constant 0.000000e+00 : f32
%c0_2835 = arith.constant 0 : index
%c1_2836 = arith.constant 1 : index
%c1_2837 = arith.constant 1 : index
%c512_2838 = arith.constant 512 : index
%c2_2839 = arith.constant 2 : index
%c28_2840 = arith.constant 28 : index
%c3_2841 = arith.constant 3 : index
%c28_2842 = arith.constant 28 : index
%c0_i64_2843 = arith.constant 0 : i64
%2757 = arith.index_cast %c0_i64_2843 : i64 to index
%2758 = arith.index_cast %c0_i64_2843 : i64 to index
%2759 = arith.index_cast %2748 : i64 to index
%2760 = arith.index_cast %2749 : i64 to index
%padded_2844 = tensor.pad %cast_2776 low[%2757, %2758, %2759, %2760] high[%2757, %2758, %2759, %2760] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2834 : f32
} : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
%2761 = arith.index_cast %c1_2825 : index to i64
%c1_i64_2845 = arith.constant 1 : i64
%c2_i64_2846 = arith.constant 2 : i64
%2762 = arith.muli %2748, %c2_i64_2846 : i64
%2763 = arith.index_cast %c28_2817 : index to i64
%2764 = arith.addi %2763, %2762 : i64
%2765 = arith.subi %2761, %c1_i64_2845 : i64
%2766 = arith.muli %c1_i64_2830, %2765 : i64
%2767 = arith.subi %2764, %2766 : i64
%2768 = arith.subi %2767, %c1_i64_2845 : i64
%2769 = arith.floordivsi %2768, %c1_i64_2832 : i64
%2770 = arith.addi %2769, %c1_i64_2845 : i64
%2771 = arith.index_cast %2770 : i64 to index
%2772 = arith.index_cast %c1_2827 : index to i64
%c1_i64_2847 = arith.constant 1 : i64
%c2_i64_2848 = arith.constant 2 : i64
%2773 = arith.muli %2749, %c2_i64_2848 : i64
%2774 = arith.index_cast %c28_2819 : index to i64
%2775 = arith.addi %2774, %2773 : i64
%2776 = arith.subi %2772, %c1_i64_2847 : i64
%2777 = arith.muli %c1_i64_2831, %2776 : i64
%2778 = arith.subi %2775, %2777 : i64
%2779 = arith.subi %2778, %c1_i64_2847 : i64
%2780 = arith.floordivsi %2779, %c1_i64_2833 : i64
%2781 = arith.addi %2780, %c1_i64_2847 : i64
%2782 = arith.index_cast %2781 : i64 to index
%2783 = tensor.empty(%2771, %2782) : tensor<1x256x?x?xf32>
%2784 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2802 : tensor<256xf32>) outs(%2783 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%2785 = arith.floordivsi %c512_2815, %2752 : index
%2786 = arith.floordivsi %c256_2821, %2752 : index
%c0_2849 = arith.constant 0 : index
%c1_2850 = arith.constant 1 : index
%2787 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2844, %cast_2791 : tensor<?x?x?x?xf32>, tensor<256x512x1x1xf32>) outs(%2784 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_2851 = tensor.cast %2787 : tensor<1x256x?x?xf32> to tensor<1x256x28x28xf32>
%c1_2852 = arith.constant 1 : index
%c1_2853 = arith.constant 1 : index
%c256_2854 = arith.constant 256 : index
%c2_2855 = arith.constant 2 : index
%c28_2856 = arith.constant 28 : index
%c3_2857 = arith.constant 3 : index
%c28_2858 = arith.constant 28 : index
%2788 = tensor.empty() : tensor<1x256x28x28xf32>
%2789 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2851 : tensor<1x256x28x28xf32>) outs(%2788 : tensor<1x256x28x28xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x28x28xf32>
%cast_2859 = tensor.cast %2789 : tensor<1x256x28x28xf32> to tensor<1x256x28x28xf32>
%2790 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2791 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2860 = torch.constant.int 12
%2792 = torch.aten.item %2790 : !torch.vtensor<[],f32> -> !torch.float
%2793 = torch_c.to_f64 %2792
%2794 = torch.aten.item %2791 : !torch.vtensor<[],si8> -> !torch.int
%2795 = torch_c.to_i64 %2794
%c1_2861 = arith.constant 1 : index
%c1_2862 = arith.constant 1 : index
%c256_2863 = arith.constant 256 : index
%c2_2864 = arith.constant 2 : index
%c28_2865 = arith.constant 28 : index
%c3_2866 = arith.constant 3 : index
%c28_2867 = arith.constant 28 : index
%2796 = tensor.empty() : tensor<1x256x28x28xi8>
%2797 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2859 : tensor<1x256x28x28xf32>) outs(%2796 : tensor<1x256x28x28xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2794
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2792
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x28x28xi8>
%cast_2868 = tensor.cast %2797 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
%cast_2869 = tensor.cast %cast_2868 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
%2798 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2799 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2800 = torch.aten.item %2798 : !torch.vtensor<[],f32> -> !torch.float
%2801 = torch_c.to_f64 %2800
%2802 = torch.aten.item %2799 : !torch.vtensor<[],si8> -> !torch.int
%2803 = torch_c.to_i64 %2802
%cast_2870 = tensor.cast %cast_2869 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
%c1_2871 = arith.constant 1 : index
%c1_2872 = arith.constant 1 : index
%c256_2873 = arith.constant 256 : index
%c2_2874 = arith.constant 2 : index
%c28_2875 = arith.constant 28 : index
%c3_2876 = arith.constant 3 : index
%c28_2877 = arith.constant 28 : index
%2804 = tensor.empty() : tensor<1x256x28x28xf32>
%2805 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2870 : tensor<1x256x28x28xi8>) outs(%2804 : tensor<1x256x28x28xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2802
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2800
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x28x28xf32>
%cast_2878 = tensor.cast %2805 : tensor<1x256x28x28xf32> to tensor<1x256x28x28xf32>
%2806 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2807 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2879 = torch.constant.int 12
%2808 = torch.aten.item %2806 : !torch.vtensor<[],f32> -> !torch.float
%2809 = torch_c.to_f64 %2808
%2810 = torch.aten.item %2807 : !torch.vtensor<[],si8> -> !torch.int
%2811 = torch_c.to_i64 %2810
%c1_2880 = arith.constant 1 : index
%c0_2881 = arith.constant 0 : index
%c256_2882 = arith.constant 256 : index
%c1_2883 = arith.constant 1 : index
%c256_2884 = arith.constant 256 : index
%c2_2885 = arith.constant 2 : index
%c3_2886 = arith.constant 3 : index
%c3_2887 = arith.constant 3 : index
%c3_2888 = arith.constant 3 : index
%2812 = tensor.empty() : tensor<256x256x3x3xi8>
%2813 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102 : tensor<256x256x3x3xf32>) outs(%2812 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2810
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2808
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_2889 = tensor.cast %2813 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_2890 = tensor.cast %cast_2889 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%2814 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2815 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2816 = torch.aten.item %2814 : !torch.vtensor<[],f32> -> !torch.float
%2817 = torch_c.to_f64 %2816
%2818 = torch.aten.item %2815 : !torch.vtensor<[],si8> -> !torch.int
%2819 = torch_c.to_i64 %2818
%cast_2891 = tensor.cast %cast_2890 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_2892 = arith.constant 1 : index
%c0_2893 = arith.constant 0 : index
%c256_2894 = arith.constant 256 : index
%c1_2895 = arith.constant 1 : index
%c256_2896 = arith.constant 256 : index
%c2_2897 = arith.constant 2 : index
%c3_2898 = arith.constant 3 : index
%c3_2899 = arith.constant 3 : index
%c3_2900 = arith.constant 3 : index
%2820 = tensor.empty() : tensor<256x256x3x3xf32>
%2821 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2891 : tensor<256x256x3x3xi8>) outs(%2820 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2818
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2816
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_2901 = tensor.cast %2821 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%2822 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2823 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2902 = torch.constant.int 12
%2824 = torch.aten.item %2822 : !torch.vtensor<[],f32> -> !torch.float
%2825 = torch_c.to_f64 %2824
%2826 = torch.aten.item %2823 : !torch.vtensor<[],si8> -> !torch.int
%2827 = torch_c.to_i64 %2826
%c1_2903 = arith.constant 1 : index
%c0_2904 = arith.constant 0 : index
%c256_2905 = arith.constant 256 : index
%2828 = tensor.empty() : tensor<256xi8>
%2829 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%104 : tensor<256xf32>) outs(%2828 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2826
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2824
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_2906 = tensor.cast %2829 : tensor<256xi8> to tensor<256xi8>
%cast_2907 = tensor.cast %cast_2906 : tensor<256xi8> to tensor<256xi8>
%2830 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2831 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2832 = torch.aten.item %2830 : !torch.vtensor<[],f32> -> !torch.float
%2833 = torch_c.to_f64 %2832
%2834 = torch.aten.item %2831 : !torch.vtensor<[],si8> -> !torch.int
%2835 = torch_c.to_i64 %2834
%cast_2908 = tensor.cast %cast_2907 : tensor<256xi8> to tensor<256xi8>
%c1_2909 = arith.constant 1 : index
%c0_2910 = arith.constant 0 : index
%c256_2911 = arith.constant 256 : index
%2836 = tensor.empty() : tensor<256xf32>
%2837 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2908 : tensor<256xi8>) outs(%2836 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2834
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2832
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_2912 = tensor.cast %2837 : tensor<256xf32> to tensor<256xf32>
%int1_2913 = torch.constant.int 1
%int1_2914 = torch.constant.int 1
%int1_2915 = torch.constant.int 1
%int1_2916 = torch.constant.int 1
%int2_2917 = torch.constant.int 2
%int2_2918 = torch.constant.int 2
%int0_2919 = torch.constant.int 0
%2838 = torch.prim.ListConstruct %int1_2913, %int1_2914 : (!torch.int, !torch.int) -> !torch.list<int>
%2839 = torch.prim.ListConstruct %int1_2915, %int1_2916 : (!torch.int, !torch.int) -> !torch.list<int>
%2840 = torch.prim.ListConstruct %int2_2917, %int2_2918 : (!torch.int, !torch.int) -> !torch.list<int>
%2841 = torch.prim.ListConstruct %int0_2919, %int0_2919 : (!torch.int, !torch.int) -> !torch.list<int>
%false_2920 = torch.constant.bool false
%int1_2921 = torch.constant.int 1
%2842 = torch_c.to_i64 %int1_2921
%2843 = torch_c.to_i64 %int1_2913
%2844 = torch_c.to_i64 %int1_2914
%2845 = torch_c.to_i64 %int0_2919
%2846 = torch_c.to_i64 %int0_2919
%c0_2922 = arith.constant 0 : index
%c1_2923 = arith.constant 1 : index
%c1_2924 = arith.constant 1 : index
%c256_2925 = arith.constant 256 : index
%c2_2926 = arith.constant 2 : index
%c28_2927 = arith.constant 28 : index
%c3_2928 = arith.constant 3 : index
%c28_2929 = arith.constant 28 : index
%c0_2930 = arith.constant 0 : index
%c256_2931 = arith.constant 256 : index
%c1_2932 = arith.constant 1 : index
%c256_2933 = arith.constant 256 : index
%c2_2934 = arith.constant 2 : index
%c3_2935 = arith.constant 3 : index
%c3_2936 = arith.constant 3 : index
%c3_2937 = arith.constant 3 : index
%2847 = arith.index_cast %2842 : i64 to index
%c0_2938 = arith.constant 0 : index
%2848 = arith.remsi %c256_2925, %2847 : index
%2849 = arith.cmpi eq, %c0_2938, %2848 : index
cf.assert %2849, "invalid: groups must divide input channel size evenly."
%c0_2939 = arith.constant 0 : index
%2850 = arith.remsi %c256_2931, %2847 : index
%2851 = arith.cmpi eq, %c0_2939, %2850 : index
cf.assert %2851, "invalid: groups must divide weight batch size evenly."
%c1_i64_2940 = arith.constant 1 : i64
%c1_i64_2941 = arith.constant 1 : i64
%c2_i64_2942 = arith.constant 2 : i64
%c2_i64_2943 = arith.constant 2 : i64
%cst_2944 = arith.constant 0.000000e+00 : f32
%c0_2945 = arith.constant 0 : index
%c1_2946 = arith.constant 1 : index
%c1_2947 = arith.constant 1 : index
%c256_2948 = arith.constant 256 : index
%c2_2949 = arith.constant 2 : index
%c28_2950 = arith.constant 28 : index
%c3_2951 = arith.constant 3 : index
%c28_2952 = arith.constant 28 : index
%c0_i64_2953 = arith.constant 0 : i64
%2852 = arith.index_cast %c0_i64_2953 : i64 to index
%2853 = arith.index_cast %c0_i64_2953 : i64 to index
%2854 = arith.index_cast %2843 : i64 to index
%2855 = arith.index_cast %2844 : i64 to index
%padded_2954 = tensor.pad %cast_2878 low[%2852, %2853, %2854, %2855] high[%2852, %2853, %2854, %2855] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_2944 : f32
} : tensor<1x256x28x28xf32> to tensor<?x?x?x?xf32>
%2856 = arith.index_cast %c3_2935 : index to i64
%c1_i64_2955 = arith.constant 1 : i64
%c2_i64_2956 = arith.constant 2 : i64
%2857 = arith.muli %2843, %c2_i64_2956 : i64
%2858 = arith.index_cast %c28_2927 : index to i64
%2859 = arith.addi %2858, %2857 : i64
%2860 = arith.subi %2856, %c1_i64_2955 : i64
%2861 = arith.muli %c1_i64_2940, %2860 : i64
%2862 = arith.subi %2859, %2861 : i64
%2863 = arith.subi %2862, %c1_i64_2955 : i64
%2864 = arith.floordivsi %2863, %c2_i64_2942 : i64
%2865 = arith.addi %2864, %c1_i64_2955 : i64
%2866 = arith.index_cast %2865 : i64 to index
%2867 = arith.index_cast %c3_2937 : index to i64
%c1_i64_2957 = arith.constant 1 : i64
%c2_i64_2958 = arith.constant 2 : i64
%2868 = arith.muli %2844, %c2_i64_2958 : i64
%2869 = arith.index_cast %c28_2929 : index to i64
%2870 = arith.addi %2869, %2868 : i64
%2871 = arith.subi %2867, %c1_i64_2957 : i64
%2872 = arith.muli %c1_i64_2941, %2871 : i64
%2873 = arith.subi %2870, %2872 : i64
%2874 = arith.subi %2873, %c1_i64_2957 : i64
%2875 = arith.floordivsi %2874, %c2_i64_2943 : i64
%2876 = arith.addi %2875, %c1_i64_2957 : i64
%2877 = arith.index_cast %2876 : i64 to index
%2878 = tensor.empty(%2866, %2877) : tensor<1x256x?x?xf32>
%2879 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2912 : tensor<256xf32>) outs(%2878 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%2880 = arith.floordivsi %c256_2925, %2847 : index
%2881 = arith.floordivsi %c256_2931, %2847 : index
%c0_2959 = arith.constant 0 : index
%c1_2960 = arith.constant 1 : index
%2882 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_2954, %cast_2901 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%2879 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_2961 = tensor.cast %2882 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_2962 = arith.constant 1 : index
%c1_2963 = arith.constant 1 : index
%c256_2964 = arith.constant 256 : index
%c2_2965 = arith.constant 2 : index
%c14 = arith.constant 14 : index
%c3_2966 = arith.constant 3 : index
%c14_2967 = arith.constant 14 : index
%2883 = tensor.empty() : tensor<1x256x14x14xf32>
%2884 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2961 : tensor<1x256x14x14xf32>) outs(%2883 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_2968 = tensor.cast %2884 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%2885 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2886 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2969 = torch.constant.int 12
%2887 = torch.aten.item %2885 : !torch.vtensor<[],f32> -> !torch.float
%2888 = torch_c.to_f64 %2887
%2889 = torch.aten.item %2886 : !torch.vtensor<[],si8> -> !torch.int
%2890 = torch_c.to_i64 %2889
%c1_2970 = arith.constant 1 : index
%c1_2971 = arith.constant 1 : index
%c256_2972 = arith.constant 256 : index
%c2_2973 = arith.constant 2 : index
%c14_2974 = arith.constant 14 : index
%c3_2975 = arith.constant 3 : index
%c14_2976 = arith.constant 14 : index
%2891 = tensor.empty() : tensor<1x256x14x14xi8>
%2892 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2968 : tensor<1x256x14x14xf32>) outs(%2891 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2889
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2887
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_2977 = tensor.cast %2892 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_2978 = tensor.cast %cast_2977 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%2893 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%2894 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2895 = torch.aten.item %2893 : !torch.vtensor<[],f32> -> !torch.float
%2896 = torch_c.to_f64 %2895
%2897 = torch.aten.item %2894 : !torch.vtensor<[],si8> -> !torch.int
%2898 = torch_c.to_i64 %2897
%cast_2979 = tensor.cast %cast_2978 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_2980 = arith.constant 1 : index
%c1_2981 = arith.constant 1 : index
%c256_2982 = arith.constant 256 : index
%c2_2983 = arith.constant 2 : index
%c14_2984 = arith.constant 14 : index
%c3_2985 = arith.constant 3 : index
%c14_2986 = arith.constant 14 : index
%2899 = tensor.empty() : tensor<1x256x14x14xf32>
%2900 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2979 : tensor<1x256x14x14xi8>) outs(%2899 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2897
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2895
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_2987 = tensor.cast %2900 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%2901 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2902 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_2988 = torch.constant.int 12
%2903 = torch.aten.item %2901 : !torch.vtensor<[],f32> -> !torch.float
%2904 = torch_c.to_f64 %2903
%2905 = torch.aten.item %2902 : !torch.vtensor<[],si8> -> !torch.int
%2906 = torch_c.to_i64 %2905
%c1_2989 = arith.constant 1 : index
%c0_2990 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c1_2991 = arith.constant 1 : index
%c256_2992 = arith.constant 256 : index
%2907 = tensor.empty() : tensor<1024x256x1x1xi8>
%2908 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%106 : tensor<1024x256x1x1xf32>) outs(%2907 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2905
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2903
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_2993 = tensor.cast %2908 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_2994 = tensor.cast %cast_2993 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%2909 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2910 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2911 = torch.aten.item %2909 : !torch.vtensor<[],f32> -> !torch.float
%2912 = torch_c.to_f64 %2911
%2913 = torch.aten.item %2910 : !torch.vtensor<[],si8> -> !torch.int
%2914 = torch_c.to_i64 %2913
%cast_2995 = tensor.cast %cast_2994 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_2996 = arith.constant 1 : index
%c0_2997 = arith.constant 0 : index
%c1024_2998 = arith.constant 1024 : index
%c1_2999 = arith.constant 1 : index
%c256_3000 = arith.constant 256 : index
%2915 = tensor.empty() : tensor<1024x256x1x1xf32>
%2916 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2995 : tensor<1024x256x1x1xi8>) outs(%2915 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2913
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2911
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_3001 = tensor.cast %2916 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%2917 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2918 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3002 = torch.constant.int 12
%2919 = torch.aten.item %2917 : !torch.vtensor<[],f32> -> !torch.float
%2920 = torch_c.to_f64 %2919
%2921 = torch.aten.item %2918 : !torch.vtensor<[],si8> -> !torch.int
%2922 = torch_c.to_i64 %2921
%c1_3003 = arith.constant 1 : index
%c0_3004 = arith.constant 0 : index
%c1024_3005 = arith.constant 1024 : index
%2923 = tensor.empty() : tensor<1024xi8>
%2924 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%108 : tensor<1024xf32>) outs(%2923 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2921
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2919
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_3006 = tensor.cast %2924 : tensor<1024xi8> to tensor<1024xi8>
%cast_3007 = tensor.cast %cast_3006 : tensor<1024xi8> to tensor<1024xi8>
%2925 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2926 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2927 = torch.aten.item %2925 : !torch.vtensor<[],f32> -> !torch.float
%2928 = torch_c.to_f64 %2927
%2929 = torch.aten.item %2926 : !torch.vtensor<[],si8> -> !torch.int
%2930 = torch_c.to_i64 %2929
%cast_3008 = tensor.cast %cast_3007 : tensor<1024xi8> to tensor<1024xi8>
%c1_3009 = arith.constant 1 : index
%c0_3010 = arith.constant 0 : index
%c1024_3011 = arith.constant 1024 : index
%2931 = tensor.empty() : tensor<1024xf32>
%2932 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3008 : tensor<1024xi8>) outs(%2931 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2929
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2927
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_3012 = tensor.cast %2932 : tensor<1024xf32> to tensor<1024xf32>
%int0_3013 = torch.constant.int 0
%int0_3014 = torch.constant.int 0
%int1_3015 = torch.constant.int 1
%int1_3016 = torch.constant.int 1
%int1_3017 = torch.constant.int 1
%int1_3018 = torch.constant.int 1
%int0_3019 = torch.constant.int 0
%2933 = torch.prim.ListConstruct %int0_3013, %int0_3014 : (!torch.int, !torch.int) -> !torch.list<int>
%2934 = torch.prim.ListConstruct %int1_3015, %int1_3016 : (!torch.int, !torch.int) -> !torch.list<int>
%2935 = torch.prim.ListConstruct %int1_3017, %int1_3018 : (!torch.int, !torch.int) -> !torch.list<int>
%2936 = torch.prim.ListConstruct %int0_3019, %int0_3019 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3020 = torch.constant.bool false
%int1_3021 = torch.constant.int 1
%2937 = torch_c.to_i64 %int1_3021
%2938 = torch_c.to_i64 %int0_3013
%2939 = torch_c.to_i64 %int0_3014
%2940 = torch_c.to_i64 %int0_3019
%2941 = torch_c.to_i64 %int0_3019
%c0_3022 = arith.constant 0 : index
%c1_3023 = arith.constant 1 : index
%c1_3024 = arith.constant 1 : index
%c256_3025 = arith.constant 256 : index
%c2_3026 = arith.constant 2 : index
%c14_3027 = arith.constant 14 : index
%c3_3028 = arith.constant 3 : index
%c14_3029 = arith.constant 14 : index
%c0_3030 = arith.constant 0 : index
%c1024_3031 = arith.constant 1024 : index
%c1_3032 = arith.constant 1 : index
%c256_3033 = arith.constant 256 : index
%c2_3034 = arith.constant 2 : index
%c1_3035 = arith.constant 1 : index
%c3_3036 = arith.constant 3 : index
%c1_3037 = arith.constant 1 : index
%2942 = arith.index_cast %2937 : i64 to index
%c0_3038 = arith.constant 0 : index
%2943 = arith.remsi %c256_3025, %2942 : index
%2944 = arith.cmpi eq, %c0_3038, %2943 : index
cf.assert %2944, "invalid: groups must divide input channel size evenly."
%c0_3039 = arith.constant 0 : index
%2945 = arith.remsi %c1024_3031, %2942 : index
%2946 = arith.cmpi eq, %c0_3039, %2945 : index
cf.assert %2946, "invalid: groups must divide weight batch size evenly."
%c1_i64_3040 = arith.constant 1 : i64
%c1_i64_3041 = arith.constant 1 : i64
%c1_i64_3042 = arith.constant 1 : i64
%c1_i64_3043 = arith.constant 1 : i64
%cst_3044 = arith.constant 0.000000e+00 : f32
%c0_3045 = arith.constant 0 : index
%c1_3046 = arith.constant 1 : index
%c1_3047 = arith.constant 1 : index
%c256_3048 = arith.constant 256 : index
%c2_3049 = arith.constant 2 : index
%c14_3050 = arith.constant 14 : index
%c3_3051 = arith.constant 3 : index
%c14_3052 = arith.constant 14 : index
%c0_i64_3053 = arith.constant 0 : i64
%2947 = arith.index_cast %c0_i64_3053 : i64 to index
%2948 = arith.index_cast %c0_i64_3053 : i64 to index
%2949 = arith.index_cast %2938 : i64 to index
%2950 = arith.index_cast %2939 : i64 to index
%padded_3054 = tensor.pad %cast_2987 low[%2947, %2948, %2949, %2950] high[%2947, %2948, %2949, %2950] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3044 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%2951 = arith.index_cast %c1_3035 : index to i64
%c1_i64_3055 = arith.constant 1 : i64
%c2_i64_3056 = arith.constant 2 : i64
%2952 = arith.muli %2938, %c2_i64_3056 : i64
%2953 = arith.index_cast %c14_3027 : index to i64
%2954 = arith.addi %2953, %2952 : i64
%2955 = arith.subi %2951, %c1_i64_3055 : i64
%2956 = arith.muli %c1_i64_3040, %2955 : i64
%2957 = arith.subi %2954, %2956 : i64
%2958 = arith.subi %2957, %c1_i64_3055 : i64
%2959 = arith.floordivsi %2958, %c1_i64_3042 : i64
%2960 = arith.addi %2959, %c1_i64_3055 : i64
%2961 = arith.index_cast %2960 : i64 to index
%2962 = arith.index_cast %c1_3037 : index to i64
%c1_i64_3057 = arith.constant 1 : i64
%c2_i64_3058 = arith.constant 2 : i64
%2963 = arith.muli %2939, %c2_i64_3058 : i64
%2964 = arith.index_cast %c14_3029 : index to i64
%2965 = arith.addi %2964, %2963 : i64
%2966 = arith.subi %2962, %c1_i64_3057 : i64
%2967 = arith.muli %c1_i64_3041, %2966 : i64
%2968 = arith.subi %2965, %2967 : i64
%2969 = arith.subi %2968, %c1_i64_3057 : i64
%2970 = arith.floordivsi %2969, %c1_i64_3043 : i64
%2971 = arith.addi %2970, %c1_i64_3057 : i64
%2972 = arith.index_cast %2971 : i64 to index
%2973 = tensor.empty(%2961, %2972) : tensor<1x1024x?x?xf32>
%2974 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3012 : tensor<1024xf32>) outs(%2973 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%2975 = arith.floordivsi %c256_3025, %2942 : index
%2976 = arith.floordivsi %c1024_3031, %2942 : index
%c0_3059 = arith.constant 0 : index
%c1_3060 = arith.constant 1 : index
%2977 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3054, %cast_3001 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%2974 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_3061 = tensor.cast %2977 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%2978 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2979 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3062 = torch.constant.int 12
%2980 = torch.aten.item %2978 : !torch.vtensor<[],f32> -> !torch.float
%2981 = torch_c.to_f64 %2980
%2982 = torch.aten.item %2979 : !torch.vtensor<[],si8> -> !torch.int
%2983 = torch_c.to_i64 %2982
%c1_3063 = arith.constant 1 : index
%c1_3064 = arith.constant 1 : index
%c1024_3065 = arith.constant 1024 : index
%c2_3066 = arith.constant 2 : index
%c14_3067 = arith.constant 14 : index
%c3_3068 = arith.constant 3 : index
%c14_3069 = arith.constant 14 : index
%2984 = tensor.empty() : tensor<1x1024x14x14xi8>
%2985 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3061 : tensor<1x1024x14x14xf32>) outs(%2984 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2982
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2980
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3070 = tensor.cast %2985 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3071 = tensor.cast %cast_3070 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%2986 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2987 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%2988 = torch.aten.item %2986 : !torch.vtensor<[],f32> -> !torch.float
%2989 = torch_c.to_f64 %2988
%2990 = torch.aten.item %2987 : !torch.vtensor<[],si8> -> !torch.int
%2991 = torch_c.to_i64 %2990
%cast_3072 = tensor.cast %cast_3071 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3073 = arith.constant 1 : index
%c1_3074 = arith.constant 1 : index
%c1024_3075 = arith.constant 1024 : index
%c2_3076 = arith.constant 2 : index
%c14_3077 = arith.constant 14 : index
%c3_3078 = arith.constant 3 : index
%c14_3079 = arith.constant 14 : index
%2992 = tensor.empty() : tensor<1x1024x14x14xf32>
%2993 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3072 : tensor<1x1024x14x14xi8>) outs(%2992 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %2990
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %2988
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3080 = tensor.cast %2993 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%2994 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%2995 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3081 = torch.constant.int 12
%2996 = torch.aten.item %2994 : !torch.vtensor<[],f32> -> !torch.float
%2997 = torch_c.to_f64 %2996
%2998 = torch.aten.item %2995 : !torch.vtensor<[],si8> -> !torch.int
%2999 = torch_c.to_i64 %2998
%c1_3082 = arith.constant 1 : index
%c0_3083 = arith.constant 0 : index
%c1024_3084 = arith.constant 1024 : index
%c1_3085 = arith.constant 1 : index
%c512_3086 = arith.constant 512 : index
%3000 = tensor.empty() : tensor<1024x512x1x1xi8>
%3001 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%110 : tensor<1024x512x1x1xf32>) outs(%3000 : tensor<1024x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %2998
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %2996
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x512x1x1xi8>
%cast_3087 = tensor.cast %3001 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
%cast_3088 = tensor.cast %cast_3087 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
%3002 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3003 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3004 = torch.aten.item %3002 : !torch.vtensor<[],f32> -> !torch.float
%3005 = torch_c.to_f64 %3004
%3006 = torch.aten.item %3003 : !torch.vtensor<[],si8> -> !torch.int
%3007 = torch_c.to_i64 %3006
%cast_3089 = tensor.cast %cast_3088 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
%c1_3090 = arith.constant 1 : index
%c0_3091 = arith.constant 0 : index
%c1024_3092 = arith.constant 1024 : index
%c1_3093 = arith.constant 1 : index
%c512_3094 = arith.constant 512 : index
%3008 = tensor.empty() : tensor<1024x512x1x1xf32>
%3009 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3089 : tensor<1024x512x1x1xi8>) outs(%3008 : tensor<1024x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3006
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3004
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x512x1x1xf32>
%cast_3095 = tensor.cast %3009 : tensor<1024x512x1x1xf32> to tensor<1024x512x1x1xf32>
%3010 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3011 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3096 = torch.constant.int 12
%3012 = torch.aten.item %3010 : !torch.vtensor<[],f32> -> !torch.float
%3013 = torch_c.to_f64 %3012
%3014 = torch.aten.item %3011 : !torch.vtensor<[],si8> -> !torch.int
%3015 = torch_c.to_i64 %3014
%c1_3097 = arith.constant 1 : index
%c0_3098 = arith.constant 0 : index
%c1024_3099 = arith.constant 1024 : index
%3016 = tensor.empty() : tensor<1024xi8>
%3017 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%112 : tensor<1024xf32>) outs(%3016 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3014
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3012
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_3100 = tensor.cast %3017 : tensor<1024xi8> to tensor<1024xi8>
%cast_3101 = tensor.cast %cast_3100 : tensor<1024xi8> to tensor<1024xi8>
%3018 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3019 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3020 = torch.aten.item %3018 : !torch.vtensor<[],f32> -> !torch.float
%3021 = torch_c.to_f64 %3020
%3022 = torch.aten.item %3019 : !torch.vtensor<[],si8> -> !torch.int
%3023 = torch_c.to_i64 %3022
%cast_3102 = tensor.cast %cast_3101 : tensor<1024xi8> to tensor<1024xi8>
%c1_3103 = arith.constant 1 : index
%c0_3104 = arith.constant 0 : index
%c1024_3105 = arith.constant 1024 : index
%3024 = tensor.empty() : tensor<1024xf32>
%3025 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3102 : tensor<1024xi8>) outs(%3024 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3022
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3020
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_3106 = tensor.cast %3025 : tensor<1024xf32> to tensor<1024xf32>
%int0_3107 = torch.constant.int 0
%int0_3108 = torch.constant.int 0
%int1_3109 = torch.constant.int 1
%int1_3110 = torch.constant.int 1
%int2_3111 = torch.constant.int 2
%int2_3112 = torch.constant.int 2
%int0_3113 = torch.constant.int 0
%3026 = torch.prim.ListConstruct %int0_3107, %int0_3108 : (!torch.int, !torch.int) -> !torch.list<int>
%3027 = torch.prim.ListConstruct %int1_3109, %int1_3110 : (!torch.int, !torch.int) -> !torch.list<int>
%3028 = torch.prim.ListConstruct %int2_3111, %int2_3112 : (!torch.int, !torch.int) -> !torch.list<int>
%3029 = torch.prim.ListConstruct %int0_3113, %int0_3113 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3114 = torch.constant.bool false
%int1_3115 = torch.constant.int 1
%3030 = torch_c.to_i64 %int1_3115
%3031 = torch_c.to_i64 %int0_3107
%3032 = torch_c.to_i64 %int0_3108
%3033 = torch_c.to_i64 %int0_3113
%3034 = torch_c.to_i64 %int0_3113
%c0_3116 = arith.constant 0 : index
%c1_3117 = arith.constant 1 : index
%c1_3118 = arith.constant 1 : index
%c512_3119 = arith.constant 512 : index
%c2_3120 = arith.constant 2 : index
%c28_3121 = arith.constant 28 : index
%c3_3122 = arith.constant 3 : index
%c28_3123 = arith.constant 28 : index
%c0_3124 = arith.constant 0 : index
%c1024_3125 = arith.constant 1024 : index
%c1_3126 = arith.constant 1 : index
%c512_3127 = arith.constant 512 : index
%c2_3128 = arith.constant 2 : index
%c1_3129 = arith.constant 1 : index
%c3_3130 = arith.constant 3 : index
%c1_3131 = arith.constant 1 : index
%3035 = arith.index_cast %3030 : i64 to index
%c0_3132 = arith.constant 0 : index
%3036 = arith.remsi %c512_3119, %3035 : index
%3037 = arith.cmpi eq, %c0_3132, %3036 : index
cf.assert %3037, "invalid: groups must divide input channel size evenly."
%c0_3133 = arith.constant 0 : index
%3038 = arith.remsi %c1024_3125, %3035 : index
%3039 = arith.cmpi eq, %c0_3133, %3038 : index
cf.assert %3039, "invalid: groups must divide weight batch size evenly."
%c1_i64_3134 = arith.constant 1 : i64
%c1_i64_3135 = arith.constant 1 : i64
%c2_i64_3136 = arith.constant 2 : i64
%c2_i64_3137 = arith.constant 2 : i64
%cst_3138 = arith.constant 0.000000e+00 : f32
%c0_3139 = arith.constant 0 : index
%c1_3140 = arith.constant 1 : index
%c1_3141 = arith.constant 1 : index
%c512_3142 = arith.constant 512 : index
%c2_3143 = arith.constant 2 : index
%c28_3144 = arith.constant 28 : index
%c3_3145 = arith.constant 3 : index
%c28_3146 = arith.constant 28 : index
%c0_i64_3147 = arith.constant 0 : i64
%3040 = arith.index_cast %c0_i64_3147 : i64 to index
%3041 = arith.index_cast %c0_i64_3147 : i64 to index
%3042 = arith.index_cast %3031 : i64 to index
%3043 = arith.index_cast %3032 : i64 to index
%padded_3148 = tensor.pad %cast_2776 low[%3040, %3041, %3042, %3043] high[%3040, %3041, %3042, %3043] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3138 : f32
} : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
%3044 = arith.index_cast %c1_3129 : index to i64
%c1_i64_3149 = arith.constant 1 : i64
%c2_i64_3150 = arith.constant 2 : i64
%3045 = arith.muli %3031, %c2_i64_3150 : i64
%3046 = arith.index_cast %c28_3121 : index to i64
%3047 = arith.addi %3046, %3045 : i64
%3048 = arith.subi %3044, %c1_i64_3149 : i64
%3049 = arith.muli %c1_i64_3134, %3048 : i64
%3050 = arith.subi %3047, %3049 : i64
%3051 = arith.subi %3050, %c1_i64_3149 : i64
%3052 = arith.floordivsi %3051, %c2_i64_3136 : i64
%3053 = arith.addi %3052, %c1_i64_3149 : i64
%3054 = arith.index_cast %3053 : i64 to index
%3055 = arith.index_cast %c1_3131 : index to i64
%c1_i64_3151 = arith.constant 1 : i64
%c2_i64_3152 = arith.constant 2 : i64
%3056 = arith.muli %3032, %c2_i64_3152 : i64
%3057 = arith.index_cast %c28_3123 : index to i64
%3058 = arith.addi %3057, %3056 : i64
%3059 = arith.subi %3055, %c1_i64_3151 : i64
%3060 = arith.muli %c1_i64_3135, %3059 : i64
%3061 = arith.subi %3058, %3060 : i64
%3062 = arith.subi %3061, %c1_i64_3151 : i64
%3063 = arith.floordivsi %3062, %c2_i64_3137 : i64
%3064 = arith.addi %3063, %c1_i64_3151 : i64
%3065 = arith.index_cast %3064 : i64 to index
%3066 = tensor.empty(%3054, %3065) : tensor<1x1024x?x?xf32>
%3067 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3106 : tensor<1024xf32>) outs(%3066 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%3068 = arith.floordivsi %c512_3119, %3035 : index
%3069 = arith.floordivsi %c1024_3125, %3035 : index
%c0_3153 = arith.constant 0 : index
%c1_3154 = arith.constant 1 : index
%3070 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_3148, %cast_3095 : tensor<?x?x?x?xf32>, tensor<1024x512x1x1xf32>) outs(%3067 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_3155 = tensor.cast %3070 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%3071 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3072 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3156 = torch.constant.int 12
%3073 = torch.aten.item %3071 : !torch.vtensor<[],f32> -> !torch.float
%3074 = torch_c.to_f64 %3073
%3075 = torch.aten.item %3072 : !torch.vtensor<[],si8> -> !torch.int
%3076 = torch_c.to_i64 %3075
%c1_3157 = arith.constant 1 : index
%c1_3158 = arith.constant 1 : index
%c1024_3159 = arith.constant 1024 : index
%c2_3160 = arith.constant 2 : index
%c14_3161 = arith.constant 14 : index
%c3_3162 = arith.constant 3 : index
%c14_3163 = arith.constant 14 : index
%3077 = tensor.empty() : tensor<1x1024x14x14xi8>
%3078 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3155 : tensor<1x1024x14x14xf32>) outs(%3077 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3075
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3073
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3164 = tensor.cast %3078 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3165 = tensor.cast %cast_3164 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3079 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3080 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3081 = torch.aten.item %3079 : !torch.vtensor<[],f32> -> !torch.float
%3082 = torch_c.to_f64 %3081
%3083 = torch.aten.item %3080 : !torch.vtensor<[],si8> -> !torch.int
%3084 = torch_c.to_i64 %3083
%cast_3166 = tensor.cast %cast_3165 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3167 = arith.constant 1 : index
%c1_3168 = arith.constant 1 : index
%c1024_3169 = arith.constant 1024 : index
%c2_3170 = arith.constant 2 : index
%c14_3171 = arith.constant 14 : index
%c3_3172 = arith.constant 3 : index
%c14_3173 = arith.constant 14 : index
%3085 = tensor.empty() : tensor<1x1024x14x14xf32>
%3086 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3166 : tensor<1x1024x14x14xi8>) outs(%3085 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3083
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3081
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3174 = tensor.cast %3086 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_3175 = torch.constant.int 1
%3087 = torch_c.to_i64 %int1_3175
%c1_3176 = arith.constant 1 : index
%c1_3177 = arith.constant 1 : index
%c1024_3178 = arith.constant 1024 : index
%c2_3179 = arith.constant 2 : index
%c14_3180 = arith.constant 14 : index
%c3_3181 = arith.constant 3 : index
%c14_3182 = arith.constant 14 : index
%c1_3183 = arith.constant 1 : index
%c1024_3184 = arith.constant 1024 : index
%3088 = arith.cmpi eq, %c1024_3178, %c1024_3184 : index
cf.assert %3088, "mismatched size for broadcast"
%c2_3185 = arith.constant 2 : index
%c14_3186 = arith.constant 14 : index
%3089 = arith.cmpi eq, %c14_3180, %c14_3186 : index
cf.assert %3089, "mismatched size for broadcast"
%c3_3187 = arith.constant 3 : index
%c14_3188 = arith.constant 14 : index
%3090 = arith.cmpi eq, %c14_3182, %c14_3188 : index
cf.assert %3090, "mismatched size for broadcast"
%3091 = tensor.empty() : tensor<1x1024x14x14xf32>
%3092 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3080, %cast_3174 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3091 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %3087 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3189 = tensor.cast %3092 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_3190 = arith.constant 1 : index
%c1_3191 = arith.constant 1 : index
%c1024_3192 = arith.constant 1024 : index
%c2_3193 = arith.constant 2 : index
%c14_3194 = arith.constant 14 : index
%c3_3195 = arith.constant 3 : index
%c14_3196 = arith.constant 14 : index
%3093 = tensor.empty() : tensor<1x1024x14x14xf32>
%3094 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3189 : tensor<1x1024x14x14xf32>) outs(%3093 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3197 = tensor.cast %3094 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3095 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3096 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3198 = torch.constant.int 12
%3097 = torch.aten.item %3095 : !torch.vtensor<[],f32> -> !torch.float
%3098 = torch_c.to_f64 %3097
%3099 = torch.aten.item %3096 : !torch.vtensor<[],si8> -> !torch.int
%3100 = torch_c.to_i64 %3099
%c1_3199 = arith.constant 1 : index
%c1_3200 = arith.constant 1 : index
%c1024_3201 = arith.constant 1024 : index
%c2_3202 = arith.constant 2 : index
%c14_3203 = arith.constant 14 : index
%c3_3204 = arith.constant 3 : index
%c14_3205 = arith.constant 14 : index
%3101 = tensor.empty() : tensor<1x1024x14x14xi8>
%3102 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3197 : tensor<1x1024x14x14xf32>) outs(%3101 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3099
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3097
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3206 = tensor.cast %3102 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3207 = tensor.cast %cast_3206 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3103 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3104 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3105 = torch.aten.item %3103 : !torch.vtensor<[],f32> -> !torch.float
%3106 = torch_c.to_f64 %3105
%3107 = torch.aten.item %3104 : !torch.vtensor<[],si8> -> !torch.int
%3108 = torch_c.to_i64 %3107
%cast_3208 = tensor.cast %cast_3207 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3209 = arith.constant 1 : index
%c1_3210 = arith.constant 1 : index
%c1024_3211 = arith.constant 1024 : index
%c2_3212 = arith.constant 2 : index
%c14_3213 = arith.constant 14 : index
%c3_3214 = arith.constant 3 : index
%c14_3215 = arith.constant 14 : index
%3109 = tensor.empty() : tensor<1x1024x14x14xf32>
%3110 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3208 : tensor<1x1024x14x14xi8>) outs(%3109 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3107
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3105
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3216 = tensor.cast %3110 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3111 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3112 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3217 = torch.constant.int 12
%3113 = torch.aten.item %3111 : !torch.vtensor<[],f32> -> !torch.float
%3114 = torch_c.to_f64 %3113
%3115 = torch.aten.item %3112 : !torch.vtensor<[],si8> -> !torch.int
%3116 = torch_c.to_i64 %3115
%c1_3218 = arith.constant 1 : index
%c0_3219 = arith.constant 0 : index
%c256_3220 = arith.constant 256 : index
%c1_3221 = arith.constant 1 : index
%c1024_3222 = arith.constant 1024 : index
%3117 = tensor.empty() : tensor<256x1024x1x1xi8>
%3118 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%114 : tensor<256x1024x1x1xf32>) outs(%3117 : tensor<256x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3115
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3113
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x1024x1x1xi8>
%cast_3223 = tensor.cast %3118 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%cast_3224 = tensor.cast %cast_3223 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%3119 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3120 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3121 = torch.aten.item %3119 : !torch.vtensor<[],f32> -> !torch.float
%3122 = torch_c.to_f64 %3121
%3123 = torch.aten.item %3120 : !torch.vtensor<[],si8> -> !torch.int
%3124 = torch_c.to_i64 %3123
%cast_3225 = tensor.cast %cast_3224 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%c1_3226 = arith.constant 1 : index
%c0_3227 = arith.constant 0 : index
%c256_3228 = arith.constant 256 : index
%c1_3229 = arith.constant 1 : index
%c1024_3230 = arith.constant 1024 : index
%3125 = tensor.empty() : tensor<256x1024x1x1xf32>
%3126 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3225 : tensor<256x1024x1x1xi8>) outs(%3125 : tensor<256x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3123
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3121
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x1024x1x1xf32>
%cast_3231 = tensor.cast %3126 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
%3127 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3128 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3232 = torch.constant.int 12
%3129 = torch.aten.item %3127 : !torch.vtensor<[],f32> -> !torch.float
%3130 = torch_c.to_f64 %3129
%3131 = torch.aten.item %3128 : !torch.vtensor<[],si8> -> !torch.int
%3132 = torch_c.to_i64 %3131
%c1_3233 = arith.constant 1 : index
%c0_3234 = arith.constant 0 : index
%c256_3235 = arith.constant 256 : index
%3133 = tensor.empty() : tensor<256xi8>
%3134 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%116 : tensor<256xf32>) outs(%3133 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3131
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3129
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_3236 = tensor.cast %3134 : tensor<256xi8> to tensor<256xi8>
%cast_3237 = tensor.cast %cast_3236 : tensor<256xi8> to tensor<256xi8>
%3135 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3136 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3137 = torch.aten.item %3135 : !torch.vtensor<[],f32> -> !torch.float
%3138 = torch_c.to_f64 %3137
%3139 = torch.aten.item %3136 : !torch.vtensor<[],si8> -> !torch.int
%3140 = torch_c.to_i64 %3139
%cast_3238 = tensor.cast %cast_3237 : tensor<256xi8> to tensor<256xi8>
%c1_3239 = arith.constant 1 : index
%c0_3240 = arith.constant 0 : index
%c256_3241 = arith.constant 256 : index
%3141 = tensor.empty() : tensor<256xf32>
%3142 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3238 : tensor<256xi8>) outs(%3141 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3139
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3137
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_3242 = tensor.cast %3142 : tensor<256xf32> to tensor<256xf32>
%int0_3243 = torch.constant.int 0
%int0_3244 = torch.constant.int 0
%int1_3245 = torch.constant.int 1
%int1_3246 = torch.constant.int 1
%int1_3247 = torch.constant.int 1
%int1_3248 = torch.constant.int 1
%int0_3249 = torch.constant.int 0
%3143 = torch.prim.ListConstruct %int0_3243, %int0_3244 : (!torch.int, !torch.int) -> !torch.list<int>
%3144 = torch.prim.ListConstruct %int1_3245, %int1_3246 : (!torch.int, !torch.int) -> !torch.list<int>
%3145 = torch.prim.ListConstruct %int1_3247, %int1_3248 : (!torch.int, !torch.int) -> !torch.list<int>
%3146 = torch.prim.ListConstruct %int0_3249, %int0_3249 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3250 = torch.constant.bool false
%int1_3251 = torch.constant.int 1
%3147 = torch_c.to_i64 %int1_3251
%3148 = torch_c.to_i64 %int0_3243
%3149 = torch_c.to_i64 %int0_3244
%3150 = torch_c.to_i64 %int0_3249
%3151 = torch_c.to_i64 %int0_3249
%c0_3252 = arith.constant 0 : index
%c1_3253 = arith.constant 1 : index
%c1_3254 = arith.constant 1 : index
%c1024_3255 = arith.constant 1024 : index
%c2_3256 = arith.constant 2 : index
%c14_3257 = arith.constant 14 : index
%c3_3258 = arith.constant 3 : index
%c14_3259 = arith.constant 14 : index
%c0_3260 = arith.constant 0 : index
%c256_3261 = arith.constant 256 : index
%c1_3262 = arith.constant 1 : index
%c1024_3263 = arith.constant 1024 : index
%c2_3264 = arith.constant 2 : index
%c1_3265 = arith.constant 1 : index
%c3_3266 = arith.constant 3 : index
%c1_3267 = arith.constant 1 : index
%3152 = arith.index_cast %3147 : i64 to index
%c0_3268 = arith.constant 0 : index
%3153 = arith.remsi %c1024_3255, %3152 : index
%3154 = arith.cmpi eq, %c0_3268, %3153 : index
cf.assert %3154, "invalid: groups must divide input channel size evenly."
%c0_3269 = arith.constant 0 : index
%3155 = arith.remsi %c256_3261, %3152 : index
%3156 = arith.cmpi eq, %c0_3269, %3155 : index
cf.assert %3156, "invalid: groups must divide weight batch size evenly."
%c1_i64_3270 = arith.constant 1 : i64
%c1_i64_3271 = arith.constant 1 : i64
%c1_i64_3272 = arith.constant 1 : i64
%c1_i64_3273 = arith.constant 1 : i64
%cst_3274 = arith.constant 0.000000e+00 : f32
%c0_3275 = arith.constant 0 : index
%c1_3276 = arith.constant 1 : index
%c1_3277 = arith.constant 1 : index
%c1024_3278 = arith.constant 1024 : index
%c2_3279 = arith.constant 2 : index
%c14_3280 = arith.constant 14 : index
%c3_3281 = arith.constant 3 : index
%c14_3282 = arith.constant 14 : index
%c0_i64_3283 = arith.constant 0 : i64
%3157 = arith.index_cast %c0_i64_3283 : i64 to index
%3158 = arith.index_cast %c0_i64_3283 : i64 to index
%3159 = arith.index_cast %3148 : i64 to index
%3160 = arith.index_cast %3149 : i64 to index
%padded_3284 = tensor.pad %cast_3216 low[%3157, %3158, %3159, %3160] high[%3157, %3158, %3159, %3160] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3274 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%3161 = arith.index_cast %c1_3265 : index to i64
%c1_i64_3285 = arith.constant 1 : i64
%c2_i64_3286 = arith.constant 2 : i64
%3162 = arith.muli %3148, %c2_i64_3286 : i64
%3163 = arith.index_cast %c14_3257 : index to i64
%3164 = arith.addi %3163, %3162 : i64
%3165 = arith.subi %3161, %c1_i64_3285 : i64
%3166 = arith.muli %c1_i64_3270, %3165 : i64
%3167 = arith.subi %3164, %3166 : i64
%3168 = arith.subi %3167, %c1_i64_3285 : i64
%3169 = arith.floordivsi %3168, %c1_i64_3272 : i64
%3170 = arith.addi %3169, %c1_i64_3285 : i64
%3171 = arith.index_cast %3170 : i64 to index
%3172 = arith.index_cast %c1_3267 : index to i64
%c1_i64_3287 = arith.constant 1 : i64
%c2_i64_3288 = arith.constant 2 : i64
%3173 = arith.muli %3149, %c2_i64_3288 : i64
%3174 = arith.index_cast %c14_3259 : index to i64
%3175 = arith.addi %3174, %3173 : i64
%3176 = arith.subi %3172, %c1_i64_3287 : i64
%3177 = arith.muli %c1_i64_3271, %3176 : i64
%3178 = arith.subi %3175, %3177 : i64
%3179 = arith.subi %3178, %c1_i64_3287 : i64
%3180 = arith.floordivsi %3179, %c1_i64_3273 : i64
%3181 = arith.addi %3180, %c1_i64_3287 : i64
%3182 = arith.index_cast %3181 : i64 to index
%3183 = tensor.empty(%3171, %3182) : tensor<1x256x?x?xf32>
%3184 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3242 : tensor<256xf32>) outs(%3183 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3185 = arith.floordivsi %c1024_3255, %3152 : index
%3186 = arith.floordivsi %c256_3261, %3152 : index
%c0_3289 = arith.constant 0 : index
%c1_3290 = arith.constant 1 : index
%3187 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3284, %cast_3231 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3184 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_3291 = tensor.cast %3187 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_3292 = arith.constant 1 : index
%c1_3293 = arith.constant 1 : index
%c256_3294 = arith.constant 256 : index
%c2_3295 = arith.constant 2 : index
%c14_3296 = arith.constant 14 : index
%c3_3297 = arith.constant 3 : index
%c14_3298 = arith.constant 14 : index
%3188 = tensor.empty() : tensor<1x256x14x14xf32>
%3189 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3291 : tensor<1x256x14x14xf32>) outs(%3188 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_3299 = tensor.cast %3189 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3190 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3191 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3300 = torch.constant.int 12
%3192 = torch.aten.item %3190 : !torch.vtensor<[],f32> -> !torch.float
%3193 = torch_c.to_f64 %3192
%3194 = torch.aten.item %3191 : !torch.vtensor<[],si8> -> !torch.int
%3195 = torch_c.to_i64 %3194
%c1_3301 = arith.constant 1 : index
%c1_3302 = arith.constant 1 : index
%c256_3303 = arith.constant 256 : index
%c2_3304 = arith.constant 2 : index
%c14_3305 = arith.constant 14 : index
%c3_3306 = arith.constant 3 : index
%c14_3307 = arith.constant 14 : index
%3196 = tensor.empty() : tensor<1x256x14x14xi8>
%3197 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3299 : tensor<1x256x14x14xf32>) outs(%3196 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3194
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3192
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_3308 = tensor.cast %3197 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_3309 = tensor.cast %cast_3308 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3198 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3199 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3200 = torch.aten.item %3198 : !torch.vtensor<[],f32> -> !torch.float
%3201 = torch_c.to_f64 %3200
%3202 = torch.aten.item %3199 : !torch.vtensor<[],si8> -> !torch.int
%3203 = torch_c.to_i64 %3202
%cast_3310 = tensor.cast %cast_3309 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_3311 = arith.constant 1 : index
%c1_3312 = arith.constant 1 : index
%c256_3313 = arith.constant 256 : index
%c2_3314 = arith.constant 2 : index
%c14_3315 = arith.constant 14 : index
%c3_3316 = arith.constant 3 : index
%c14_3317 = arith.constant 14 : index
%3204 = tensor.empty() : tensor<1x256x14x14xf32>
%3205 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3310 : tensor<1x256x14x14xi8>) outs(%3204 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3202
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3200
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_3318 = tensor.cast %3205 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3206 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3207 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3319 = torch.constant.int 12
%3208 = torch.aten.item %3206 : !torch.vtensor<[],f32> -> !torch.float
%3209 = torch_c.to_f64 %3208
%3210 = torch.aten.item %3207 : !torch.vtensor<[],si8> -> !torch.int
%3211 = torch_c.to_i64 %3210
%c1_3320 = arith.constant 1 : index
%c0_3321 = arith.constant 0 : index
%c256_3322 = arith.constant 256 : index
%c1_3323 = arith.constant 1 : index
%c256_3324 = arith.constant 256 : index
%c2_3325 = arith.constant 2 : index
%c3_3326 = arith.constant 3 : index
%c3_3327 = arith.constant 3 : index
%c3_3328 = arith.constant 3 : index
%3212 = tensor.empty() : tensor<256x256x3x3xi8>
%3213 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<256x256x3x3xf32>) outs(%3212 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3210
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3208
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_3329 = tensor.cast %3213 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_3330 = tensor.cast %cast_3329 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%3214 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3215 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3216 = torch.aten.item %3214 : !torch.vtensor<[],f32> -> !torch.float
%3217 = torch_c.to_f64 %3216
%3218 = torch.aten.item %3215 : !torch.vtensor<[],si8> -> !torch.int
%3219 = torch_c.to_i64 %3218
%cast_3331 = tensor.cast %cast_3330 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_3332 = arith.constant 1 : index
%c0_3333 = arith.constant 0 : index
%c256_3334 = arith.constant 256 : index
%c1_3335 = arith.constant 1 : index
%c256_3336 = arith.constant 256 : index
%c2_3337 = arith.constant 2 : index
%c3_3338 = arith.constant 3 : index
%c3_3339 = arith.constant 3 : index
%c3_3340 = arith.constant 3 : index
%3220 = tensor.empty() : tensor<256x256x3x3xf32>
%3221 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3331 : tensor<256x256x3x3xi8>) outs(%3220 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3218
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3216
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_3341 = tensor.cast %3221 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%3222 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3223 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3342 = torch.constant.int 12
%3224 = torch.aten.item %3222 : !torch.vtensor<[],f32> -> !torch.float
%3225 = torch_c.to_f64 %3224
%3226 = torch.aten.item %3223 : !torch.vtensor<[],si8> -> !torch.int
%3227 = torch_c.to_i64 %3226
%c1_3343 = arith.constant 1 : index
%c0_3344 = arith.constant 0 : index
%c256_3345 = arith.constant 256 : index
%3228 = tensor.empty() : tensor<256xi8>
%3229 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%120 : tensor<256xf32>) outs(%3228 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3226
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3224
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_3346 = tensor.cast %3229 : tensor<256xi8> to tensor<256xi8>
%cast_3347 = tensor.cast %cast_3346 : tensor<256xi8> to tensor<256xi8>
%3230 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3231 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3232 = torch.aten.item %3230 : !torch.vtensor<[],f32> -> !torch.float
%3233 = torch_c.to_f64 %3232
%3234 = torch.aten.item %3231 : !torch.vtensor<[],si8> -> !torch.int
%3235 = torch_c.to_i64 %3234
%cast_3348 = tensor.cast %cast_3347 : tensor<256xi8> to tensor<256xi8>
%c1_3349 = arith.constant 1 : index
%c0_3350 = arith.constant 0 : index
%c256_3351 = arith.constant 256 : index
%3236 = tensor.empty() : tensor<256xf32>
%3237 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3348 : tensor<256xi8>) outs(%3236 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3234
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3232
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_3352 = tensor.cast %3237 : tensor<256xf32> to tensor<256xf32>
%int1_3353 = torch.constant.int 1
%int1_3354 = torch.constant.int 1
%int1_3355 = torch.constant.int 1
%int1_3356 = torch.constant.int 1
%int1_3357 = torch.constant.int 1
%int1_3358 = torch.constant.int 1
%int0_3359 = torch.constant.int 0
%3238 = torch.prim.ListConstruct %int1_3353, %int1_3354 : (!torch.int, !torch.int) -> !torch.list<int>
%3239 = torch.prim.ListConstruct %int1_3355, %int1_3356 : (!torch.int, !torch.int) -> !torch.list<int>
%3240 = torch.prim.ListConstruct %int1_3357, %int1_3358 : (!torch.int, !torch.int) -> !torch.list<int>
%3241 = torch.prim.ListConstruct %int0_3359, %int0_3359 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3360 = torch.constant.bool false
%int1_3361 = torch.constant.int 1
%3242 = torch_c.to_i64 %int1_3361
%3243 = torch_c.to_i64 %int1_3353
%3244 = torch_c.to_i64 %int1_3354
%3245 = torch_c.to_i64 %int0_3359
%3246 = torch_c.to_i64 %int0_3359
%c0_3362 = arith.constant 0 : index
%c1_3363 = arith.constant 1 : index
%c1_3364 = arith.constant 1 : index
%c256_3365 = arith.constant 256 : index
%c2_3366 = arith.constant 2 : index
%c14_3367 = arith.constant 14 : index
%c3_3368 = arith.constant 3 : index
%c14_3369 = arith.constant 14 : index
%c0_3370 = arith.constant 0 : index
%c256_3371 = arith.constant 256 : index
%c1_3372 = arith.constant 1 : index
%c256_3373 = arith.constant 256 : index
%c2_3374 = arith.constant 2 : index
%c3_3375 = arith.constant 3 : index
%c3_3376 = arith.constant 3 : index
%c3_3377 = arith.constant 3 : index
%3247 = arith.index_cast %3242 : i64 to index
%c0_3378 = arith.constant 0 : index
%3248 = arith.remsi %c256_3365, %3247 : index
%3249 = arith.cmpi eq, %c0_3378, %3248 : index
cf.assert %3249, "invalid: groups must divide input channel size evenly."
%c0_3379 = arith.constant 0 : index
%3250 = arith.remsi %c256_3371, %3247 : index
%3251 = arith.cmpi eq, %c0_3379, %3250 : index
cf.assert %3251, "invalid: groups must divide weight batch size evenly."
%c1_i64_3380 = arith.constant 1 : i64
%c1_i64_3381 = arith.constant 1 : i64
%c1_i64_3382 = arith.constant 1 : i64
%c1_i64_3383 = arith.constant 1 : i64
%cst_3384 = arith.constant 0.000000e+00 : f32
%c0_3385 = arith.constant 0 : index
%c1_3386 = arith.constant 1 : index
%c1_3387 = arith.constant 1 : index
%c256_3388 = arith.constant 256 : index
%c2_3389 = arith.constant 2 : index
%c14_3390 = arith.constant 14 : index
%c3_3391 = arith.constant 3 : index
%c14_3392 = arith.constant 14 : index
%c0_i64_3393 = arith.constant 0 : i64
%3252 = arith.index_cast %c0_i64_3393 : i64 to index
%3253 = arith.index_cast %c0_i64_3393 : i64 to index
%3254 = arith.index_cast %3243 : i64 to index
%3255 = arith.index_cast %3244 : i64 to index
%padded_3394 = tensor.pad %cast_3318 low[%3252, %3253, %3254, %3255] high[%3252, %3253, %3254, %3255] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3384 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3256 = arith.index_cast %c3_3375 : index to i64
%c1_i64_3395 = arith.constant 1 : i64
%c2_i64_3396 = arith.constant 2 : i64
%3257 = arith.muli %3243, %c2_i64_3396 : i64
%3258 = arith.index_cast %c14_3367 : index to i64
%3259 = arith.addi %3258, %3257 : i64
%3260 = arith.subi %3256, %c1_i64_3395 : i64
%3261 = arith.muli %c1_i64_3380, %3260 : i64
%3262 = arith.subi %3259, %3261 : i64
%3263 = arith.subi %3262, %c1_i64_3395 : i64
%3264 = arith.floordivsi %3263, %c1_i64_3382 : i64
%3265 = arith.addi %3264, %c1_i64_3395 : i64
%3266 = arith.index_cast %3265 : i64 to index
%3267 = arith.index_cast %c3_3377 : index to i64
%c1_i64_3397 = arith.constant 1 : i64
%c2_i64_3398 = arith.constant 2 : i64
%3268 = arith.muli %3244, %c2_i64_3398 : i64
%3269 = arith.index_cast %c14_3369 : index to i64
%3270 = arith.addi %3269, %3268 : i64
%3271 = arith.subi %3267, %c1_i64_3397 : i64
%3272 = arith.muli %c1_i64_3381, %3271 : i64
%3273 = arith.subi %3270, %3272 : i64
%3274 = arith.subi %3273, %c1_i64_3397 : i64
%3275 = arith.floordivsi %3274, %c1_i64_3383 : i64
%3276 = arith.addi %3275, %c1_i64_3397 : i64
%3277 = arith.index_cast %3276 : i64 to index
%3278 = tensor.empty(%3266, %3277) : tensor<1x256x?x?xf32>
%3279 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3352 : tensor<256xf32>) outs(%3278 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3280 = arith.floordivsi %c256_3365, %3247 : index
%3281 = arith.floordivsi %c256_3371, %3247 : index
%c0_3399 = arith.constant 0 : index
%c1_3400 = arith.constant 1 : index
%3282 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3394, %cast_3341 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3279 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_3401 = tensor.cast %3282 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_3402 = arith.constant 1 : index
%c1_3403 = arith.constant 1 : index
%c256_3404 = arith.constant 256 : index
%c2_3405 = arith.constant 2 : index
%c14_3406 = arith.constant 14 : index
%c3_3407 = arith.constant 3 : index
%c14_3408 = arith.constant 14 : index
%3283 = tensor.empty() : tensor<1x256x14x14xf32>
%3284 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3401 : tensor<1x256x14x14xf32>) outs(%3283 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_3409 = tensor.cast %3284 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3285 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3286 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3410 = torch.constant.int 12
%3287 = torch.aten.item %3285 : !torch.vtensor<[],f32> -> !torch.float
%3288 = torch_c.to_f64 %3287
%3289 = torch.aten.item %3286 : !torch.vtensor<[],si8> -> !torch.int
%3290 = torch_c.to_i64 %3289
%c1_3411 = arith.constant 1 : index
%c1_3412 = arith.constant 1 : index
%c256_3413 = arith.constant 256 : index
%c2_3414 = arith.constant 2 : index
%c14_3415 = arith.constant 14 : index
%c3_3416 = arith.constant 3 : index
%c14_3417 = arith.constant 14 : index
%3291 = tensor.empty() : tensor<1x256x14x14xi8>
%3292 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3409 : tensor<1x256x14x14xf32>) outs(%3291 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3289
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3287
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_3418 = tensor.cast %3292 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_3419 = tensor.cast %cast_3418 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3293 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3294 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3295 = torch.aten.item %3293 : !torch.vtensor<[],f32> -> !torch.float
%3296 = torch_c.to_f64 %3295
%3297 = torch.aten.item %3294 : !torch.vtensor<[],si8> -> !torch.int
%3298 = torch_c.to_i64 %3297
%cast_3420 = tensor.cast %cast_3419 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_3421 = arith.constant 1 : index
%c1_3422 = arith.constant 1 : index
%c256_3423 = arith.constant 256 : index
%c2_3424 = arith.constant 2 : index
%c14_3425 = arith.constant 14 : index
%c3_3426 = arith.constant 3 : index
%c14_3427 = arith.constant 14 : index
%3299 = tensor.empty() : tensor<1x256x14x14xf32>
%3300 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3420 : tensor<1x256x14x14xi8>) outs(%3299 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3297
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3295
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_3428 = tensor.cast %3300 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3301 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3302 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3429 = torch.constant.int 12
%3303 = torch.aten.item %3301 : !torch.vtensor<[],f32> -> !torch.float
%3304 = torch_c.to_f64 %3303
%3305 = torch.aten.item %3302 : !torch.vtensor<[],si8> -> !torch.int
%3306 = torch_c.to_i64 %3305
%c1_3430 = arith.constant 1 : index
%c0_3431 = arith.constant 0 : index
%c1024_3432 = arith.constant 1024 : index
%c1_3433 = arith.constant 1 : index
%c256_3434 = arith.constant 256 : index
%3307 = tensor.empty() : tensor<1024x256x1x1xi8>
%3308 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122 : tensor<1024x256x1x1xf32>) outs(%3307 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3305
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3303
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_3435 = tensor.cast %3308 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_3436 = tensor.cast %cast_3435 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%3309 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3310 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3311 = torch.aten.item %3309 : !torch.vtensor<[],f32> -> !torch.float
%3312 = torch_c.to_f64 %3311
%3313 = torch.aten.item %3310 : !torch.vtensor<[],si8> -> !torch.int
%3314 = torch_c.to_i64 %3313
%cast_3437 = tensor.cast %cast_3436 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_3438 = arith.constant 1 : index
%c0_3439 = arith.constant 0 : index
%c1024_3440 = arith.constant 1024 : index
%c1_3441 = arith.constant 1 : index
%c256_3442 = arith.constant 256 : index
%3315 = tensor.empty() : tensor<1024x256x1x1xf32>
%3316 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3437 : tensor<1024x256x1x1xi8>) outs(%3315 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3313
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3311
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_3443 = tensor.cast %3316 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%3317 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3318 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3444 = torch.constant.int 12
%3319 = torch.aten.item %3317 : !torch.vtensor<[],f32> -> !torch.float
%3320 = torch_c.to_f64 %3319
%3321 = torch.aten.item %3318 : !torch.vtensor<[],si8> -> !torch.int
%3322 = torch_c.to_i64 %3321
%c1_3445 = arith.constant 1 : index
%c0_3446 = arith.constant 0 : index
%c1024_3447 = arith.constant 1024 : index
%3323 = tensor.empty() : tensor<1024xi8>
%3324 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%124 : tensor<1024xf32>) outs(%3323 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3321
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3319
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_3448 = tensor.cast %3324 : tensor<1024xi8> to tensor<1024xi8>
%cast_3449 = tensor.cast %cast_3448 : tensor<1024xi8> to tensor<1024xi8>
%3325 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3326 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3327 = torch.aten.item %3325 : !torch.vtensor<[],f32> -> !torch.float
%3328 = torch_c.to_f64 %3327
%3329 = torch.aten.item %3326 : !torch.vtensor<[],si8> -> !torch.int
%3330 = torch_c.to_i64 %3329
%cast_3450 = tensor.cast %cast_3449 : tensor<1024xi8> to tensor<1024xi8>
%c1_3451 = arith.constant 1 : index
%c0_3452 = arith.constant 0 : index
%c1024_3453 = arith.constant 1024 : index
%3331 = tensor.empty() : tensor<1024xf32>
%3332 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3450 : tensor<1024xi8>) outs(%3331 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3329
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3327
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_3454 = tensor.cast %3332 : tensor<1024xf32> to tensor<1024xf32>
%int0_3455 = torch.constant.int 0
%int0_3456 = torch.constant.int 0
%int1_3457 = torch.constant.int 1
%int1_3458 = torch.constant.int 1
%int1_3459 = torch.constant.int 1
%int1_3460 = torch.constant.int 1
%int0_3461 = torch.constant.int 0
%3333 = torch.prim.ListConstruct %int0_3455, %int0_3456 : (!torch.int, !torch.int) -> !torch.list<int>
%3334 = torch.prim.ListConstruct %int1_3457, %int1_3458 : (!torch.int, !torch.int) -> !torch.list<int>
%3335 = torch.prim.ListConstruct %int1_3459, %int1_3460 : (!torch.int, !torch.int) -> !torch.list<int>
%3336 = torch.prim.ListConstruct %int0_3461, %int0_3461 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3462 = torch.constant.bool false
%int1_3463 = torch.constant.int 1
%3337 = torch_c.to_i64 %int1_3463
%3338 = torch_c.to_i64 %int0_3455
%3339 = torch_c.to_i64 %int0_3456
%3340 = torch_c.to_i64 %int0_3461
%3341 = torch_c.to_i64 %int0_3461
%c0_3464 = arith.constant 0 : index
%c1_3465 = arith.constant 1 : index
%c1_3466 = arith.constant 1 : index
%c256_3467 = arith.constant 256 : index
%c2_3468 = arith.constant 2 : index
%c14_3469 = arith.constant 14 : index
%c3_3470 = arith.constant 3 : index
%c14_3471 = arith.constant 14 : index
%c0_3472 = arith.constant 0 : index
%c1024_3473 = arith.constant 1024 : index
%c1_3474 = arith.constant 1 : index
%c256_3475 = arith.constant 256 : index
%c2_3476 = arith.constant 2 : index
%c1_3477 = arith.constant 1 : index
%c3_3478 = arith.constant 3 : index
%c1_3479 = arith.constant 1 : index
%3342 = arith.index_cast %3337 : i64 to index
%c0_3480 = arith.constant 0 : index
%3343 = arith.remsi %c256_3467, %3342 : index
%3344 = arith.cmpi eq, %c0_3480, %3343 : index
cf.assert %3344, "invalid: groups must divide input channel size evenly."
%c0_3481 = arith.constant 0 : index
%3345 = arith.remsi %c1024_3473, %3342 : index
%3346 = arith.cmpi eq, %c0_3481, %3345 : index
cf.assert %3346, "invalid: groups must divide weight batch size evenly."
%c1_i64_3482 = arith.constant 1 : i64
%c1_i64_3483 = arith.constant 1 : i64
%c1_i64_3484 = arith.constant 1 : i64
%c1_i64_3485 = arith.constant 1 : i64
%cst_3486 = arith.constant 0.000000e+00 : f32
%c0_3487 = arith.constant 0 : index
%c1_3488 = arith.constant 1 : index
%c1_3489 = arith.constant 1 : index
%c256_3490 = arith.constant 256 : index
%c2_3491 = arith.constant 2 : index
%c14_3492 = arith.constant 14 : index
%c3_3493 = arith.constant 3 : index
%c14_3494 = arith.constant 14 : index
%c0_i64_3495 = arith.constant 0 : i64
%3347 = arith.index_cast %c0_i64_3495 : i64 to index
%3348 = arith.index_cast %c0_i64_3495 : i64 to index
%3349 = arith.index_cast %3338 : i64 to index
%3350 = arith.index_cast %3339 : i64 to index
%padded_3496 = tensor.pad %cast_3428 low[%3347, %3348, %3349, %3350] high[%3347, %3348, %3349, %3350] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3486 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3351 = arith.index_cast %c1_3477 : index to i64
%c1_i64_3497 = arith.constant 1 : i64
%c2_i64_3498 = arith.constant 2 : i64
%3352 = arith.muli %3338, %c2_i64_3498 : i64
%3353 = arith.index_cast %c14_3469 : index to i64
%3354 = arith.addi %3353, %3352 : i64
%3355 = arith.subi %3351, %c1_i64_3497 : i64
%3356 = arith.muli %c1_i64_3482, %3355 : i64
%3357 = arith.subi %3354, %3356 : i64
%3358 = arith.subi %3357, %c1_i64_3497 : i64
%3359 = arith.floordivsi %3358, %c1_i64_3484 : i64
%3360 = arith.addi %3359, %c1_i64_3497 : i64
%3361 = arith.index_cast %3360 : i64 to index
%3362 = arith.index_cast %c1_3479 : index to i64
%c1_i64_3499 = arith.constant 1 : i64
%c2_i64_3500 = arith.constant 2 : i64
%3363 = arith.muli %3339, %c2_i64_3500 : i64
%3364 = arith.index_cast %c14_3471 : index to i64
%3365 = arith.addi %3364, %3363 : i64
%3366 = arith.subi %3362, %c1_i64_3499 : i64
%3367 = arith.muli %c1_i64_3483, %3366 : i64
%3368 = arith.subi %3365, %3367 : i64
%3369 = arith.subi %3368, %c1_i64_3499 : i64
%3370 = arith.floordivsi %3369, %c1_i64_3485 : i64
%3371 = arith.addi %3370, %c1_i64_3499 : i64
%3372 = arith.index_cast %3371 : i64 to index
%3373 = tensor.empty(%3361, %3372) : tensor<1x1024x?x?xf32>
%3374 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3454 : tensor<1024xf32>) outs(%3373 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%3375 = arith.floordivsi %c256_3467, %3342 : index
%3376 = arith.floordivsi %c1024_3473, %3342 : index
%c0_3501 = arith.constant 0 : index
%c1_3502 = arith.constant 1 : index
%3377 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3496, %cast_3443 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3374 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_3503 = tensor.cast %3377 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%3378 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3379 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3504 = torch.constant.int 12
%3380 = torch.aten.item %3378 : !torch.vtensor<[],f32> -> !torch.float
%3381 = torch_c.to_f64 %3380
%3382 = torch.aten.item %3379 : !torch.vtensor<[],si8> -> !torch.int
%3383 = torch_c.to_i64 %3382
%c1_3505 = arith.constant 1 : index
%c1_3506 = arith.constant 1 : index
%c1024_3507 = arith.constant 1024 : index
%c2_3508 = arith.constant 2 : index
%c14_3509 = arith.constant 14 : index
%c3_3510 = arith.constant 3 : index
%c14_3511 = arith.constant 14 : index
%3384 = tensor.empty() : tensor<1x1024x14x14xi8>
%3385 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3503 : tensor<1x1024x14x14xf32>) outs(%3384 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3382
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3380
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3512 = tensor.cast %3385 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3513 = tensor.cast %cast_3512 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3386 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3387 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3388 = torch.aten.item %3386 : !torch.vtensor<[],f32> -> !torch.float
%3389 = torch_c.to_f64 %3388
%3390 = torch.aten.item %3387 : !torch.vtensor<[],si8> -> !torch.int
%3391 = torch_c.to_i64 %3390
%cast_3514 = tensor.cast %cast_3513 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3515 = arith.constant 1 : index
%c1_3516 = arith.constant 1 : index
%c1024_3517 = arith.constant 1024 : index
%c2_3518 = arith.constant 2 : index
%c14_3519 = arith.constant 14 : index
%c3_3520 = arith.constant 3 : index
%c14_3521 = arith.constant 14 : index
%3392 = tensor.empty() : tensor<1x1024x14x14xf32>
%3393 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3514 : tensor<1x1024x14x14xi8>) outs(%3392 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3390
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3388
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3522 = tensor.cast %3393 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_3523 = torch.constant.int 1
%3394 = torch_c.to_i64 %int1_3523
%c1_3524 = arith.constant 1 : index
%c1_3525 = arith.constant 1 : index
%c1024_3526 = arith.constant 1024 : index
%c2_3527 = arith.constant 2 : index
%c14_3528 = arith.constant 14 : index
%c3_3529 = arith.constant 3 : index
%c14_3530 = arith.constant 14 : index
%c1_3531 = arith.constant 1 : index
%c1024_3532 = arith.constant 1024 : index
%3395 = arith.cmpi eq, %c1024_3526, %c1024_3532 : index
cf.assert %3395, "mismatched size for broadcast"
%c2_3533 = arith.constant 2 : index
%c14_3534 = arith.constant 14 : index
%3396 = arith.cmpi eq, %c14_3528, %c14_3534 : index
cf.assert %3396, "mismatched size for broadcast"
%c3_3535 = arith.constant 3 : index
%c14_3536 = arith.constant 14 : index
%3397 = arith.cmpi eq, %c14_3530, %c14_3536 : index
cf.assert %3397, "mismatched size for broadcast"
%3398 = tensor.empty() : tensor<1x1024x14x14xf32>
%3399 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3522, %cast_3216 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3398 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %3394 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3537 = tensor.cast %3399 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_3538 = arith.constant 1 : index
%c1_3539 = arith.constant 1 : index
%c1024_3540 = arith.constant 1024 : index
%c2_3541 = arith.constant 2 : index
%c14_3542 = arith.constant 14 : index
%c3_3543 = arith.constant 3 : index
%c14_3544 = arith.constant 14 : index
%3400 = tensor.empty() : tensor<1x1024x14x14xf32>
%3401 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3537 : tensor<1x1024x14x14xf32>) outs(%3400 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3545 = tensor.cast %3401 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3402 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3403 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3546 = torch.constant.int 12
%3404 = torch.aten.item %3402 : !torch.vtensor<[],f32> -> !torch.float
%3405 = torch_c.to_f64 %3404
%3406 = torch.aten.item %3403 : !torch.vtensor<[],si8> -> !torch.int
%3407 = torch_c.to_i64 %3406
%c1_3547 = arith.constant 1 : index
%c1_3548 = arith.constant 1 : index
%c1024_3549 = arith.constant 1024 : index
%c2_3550 = arith.constant 2 : index
%c14_3551 = arith.constant 14 : index
%c3_3552 = arith.constant 3 : index
%c14_3553 = arith.constant 14 : index
%3408 = tensor.empty() : tensor<1x1024x14x14xi8>
%3409 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3545 : tensor<1x1024x14x14xf32>) outs(%3408 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3406
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3404
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3554 = tensor.cast %3409 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3555 = tensor.cast %cast_3554 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3410 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3411 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3412 = torch.aten.item %3410 : !torch.vtensor<[],f32> -> !torch.float
%3413 = torch_c.to_f64 %3412
%3414 = torch.aten.item %3411 : !torch.vtensor<[],si8> -> !torch.int
%3415 = torch_c.to_i64 %3414
%cast_3556 = tensor.cast %cast_3555 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3557 = arith.constant 1 : index
%c1_3558 = arith.constant 1 : index
%c1024_3559 = arith.constant 1024 : index
%c2_3560 = arith.constant 2 : index
%c14_3561 = arith.constant 14 : index
%c3_3562 = arith.constant 3 : index
%c14_3563 = arith.constant 14 : index
%3416 = tensor.empty() : tensor<1x1024x14x14xf32>
%3417 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3556 : tensor<1x1024x14x14xi8>) outs(%3416 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3414
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3412
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3564 = tensor.cast %3417 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3418 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3419 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3565 = torch.constant.int 12
%3420 = torch.aten.item %3418 : !torch.vtensor<[],f32> -> !torch.float
%3421 = torch_c.to_f64 %3420
%3422 = torch.aten.item %3419 : !torch.vtensor<[],si8> -> !torch.int
%3423 = torch_c.to_i64 %3422
%c1_3566 = arith.constant 1 : index
%c0_3567 = arith.constant 0 : index
%c256_3568 = arith.constant 256 : index
%c1_3569 = arith.constant 1 : index
%c1024_3570 = arith.constant 1024 : index
%3424 = tensor.empty() : tensor<256x1024x1x1xi8>
%3425 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126 : tensor<256x1024x1x1xf32>) outs(%3424 : tensor<256x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3422
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3420
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x1024x1x1xi8>
%cast_3571 = tensor.cast %3425 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%cast_3572 = tensor.cast %cast_3571 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%3426 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3427 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3428 = torch.aten.item %3426 : !torch.vtensor<[],f32> -> !torch.float
%3429 = torch_c.to_f64 %3428
%3430 = torch.aten.item %3427 : !torch.vtensor<[],si8> -> !torch.int
%3431 = torch_c.to_i64 %3430
%cast_3573 = tensor.cast %cast_3572 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%c1_3574 = arith.constant 1 : index
%c0_3575 = arith.constant 0 : index
%c256_3576 = arith.constant 256 : index
%c1_3577 = arith.constant 1 : index
%c1024_3578 = arith.constant 1024 : index
%3432 = tensor.empty() : tensor<256x1024x1x1xf32>
%3433 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3573 : tensor<256x1024x1x1xi8>) outs(%3432 : tensor<256x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3430
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3428
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x1024x1x1xf32>
%cast_3579 = tensor.cast %3433 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
%3434 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3435 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3580 = torch.constant.int 12
%3436 = torch.aten.item %3434 : !torch.vtensor<[],f32> -> !torch.float
%3437 = torch_c.to_f64 %3436
%3438 = torch.aten.item %3435 : !torch.vtensor<[],si8> -> !torch.int
%3439 = torch_c.to_i64 %3438
%c1_3581 = arith.constant 1 : index
%c0_3582 = arith.constant 0 : index
%c256_3583 = arith.constant 256 : index
%3440 = tensor.empty() : tensor<256xi8>
%3441 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%128 : tensor<256xf32>) outs(%3440 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3438
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3436
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_3584 = tensor.cast %3441 : tensor<256xi8> to tensor<256xi8>
%cast_3585 = tensor.cast %cast_3584 : tensor<256xi8> to tensor<256xi8>
%3442 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3443 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3444 = torch.aten.item %3442 : !torch.vtensor<[],f32> -> !torch.float
%3445 = torch_c.to_f64 %3444
%3446 = torch.aten.item %3443 : !torch.vtensor<[],si8> -> !torch.int
%3447 = torch_c.to_i64 %3446
%cast_3586 = tensor.cast %cast_3585 : tensor<256xi8> to tensor<256xi8>
%c1_3587 = arith.constant 1 : index
%c0_3588 = arith.constant 0 : index
%c256_3589 = arith.constant 256 : index
%3448 = tensor.empty() : tensor<256xf32>
%3449 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3586 : tensor<256xi8>) outs(%3448 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3446
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3444
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_3590 = tensor.cast %3449 : tensor<256xf32> to tensor<256xf32>
%int0_3591 = torch.constant.int 0
%int0_3592 = torch.constant.int 0
%int1_3593 = torch.constant.int 1
%int1_3594 = torch.constant.int 1
%int1_3595 = torch.constant.int 1
%int1_3596 = torch.constant.int 1
%int0_3597 = torch.constant.int 0
%3450 = torch.prim.ListConstruct %int0_3591, %int0_3592 : (!torch.int, !torch.int) -> !torch.list<int>
%3451 = torch.prim.ListConstruct %int1_3593, %int1_3594 : (!torch.int, !torch.int) -> !torch.list<int>
%3452 = torch.prim.ListConstruct %int1_3595, %int1_3596 : (!torch.int, !torch.int) -> !torch.list<int>
%3453 = torch.prim.ListConstruct %int0_3597, %int0_3597 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3598 = torch.constant.bool false
%int1_3599 = torch.constant.int 1
%3454 = torch_c.to_i64 %int1_3599
%3455 = torch_c.to_i64 %int0_3591
%3456 = torch_c.to_i64 %int0_3592
%3457 = torch_c.to_i64 %int0_3597
%3458 = torch_c.to_i64 %int0_3597
%c0_3600 = arith.constant 0 : index
%c1_3601 = arith.constant 1 : index
%c1_3602 = arith.constant 1 : index
%c1024_3603 = arith.constant 1024 : index
%c2_3604 = arith.constant 2 : index
%c14_3605 = arith.constant 14 : index
%c3_3606 = arith.constant 3 : index
%c14_3607 = arith.constant 14 : index
%c0_3608 = arith.constant 0 : index
%c256_3609 = arith.constant 256 : index
%c1_3610 = arith.constant 1 : index
%c1024_3611 = arith.constant 1024 : index
%c2_3612 = arith.constant 2 : index
%c1_3613 = arith.constant 1 : index
%c3_3614 = arith.constant 3 : index
%c1_3615 = arith.constant 1 : index
%3459 = arith.index_cast %3454 : i64 to index
%c0_3616 = arith.constant 0 : index
%3460 = arith.remsi %c1024_3603, %3459 : index
%3461 = arith.cmpi eq, %c0_3616, %3460 : index
cf.assert %3461, "invalid: groups must divide input channel size evenly."
%c0_3617 = arith.constant 0 : index
%3462 = arith.remsi %c256_3609, %3459 : index
%3463 = arith.cmpi eq, %c0_3617, %3462 : index
cf.assert %3463, "invalid: groups must divide weight batch size evenly."
%c1_i64_3618 = arith.constant 1 : i64
%c1_i64_3619 = arith.constant 1 : i64
%c1_i64_3620 = arith.constant 1 : i64
%c1_i64_3621 = arith.constant 1 : i64
%cst_3622 = arith.constant 0.000000e+00 : f32
%c0_3623 = arith.constant 0 : index
%c1_3624 = arith.constant 1 : index
%c1_3625 = arith.constant 1 : index
%c1024_3626 = arith.constant 1024 : index
%c2_3627 = arith.constant 2 : index
%c14_3628 = arith.constant 14 : index
%c3_3629 = arith.constant 3 : index
%c14_3630 = arith.constant 14 : index
%c0_i64_3631 = arith.constant 0 : i64
%3464 = arith.index_cast %c0_i64_3631 : i64 to index
%3465 = arith.index_cast %c0_i64_3631 : i64 to index
%3466 = arith.index_cast %3455 : i64 to index
%3467 = arith.index_cast %3456 : i64 to index
%padded_3632 = tensor.pad %cast_3564 low[%3464, %3465, %3466, %3467] high[%3464, %3465, %3466, %3467] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3622 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%3468 = arith.index_cast %c1_3613 : index to i64
%c1_i64_3633 = arith.constant 1 : i64
%c2_i64_3634 = arith.constant 2 : i64
%3469 = arith.muli %3455, %c2_i64_3634 : i64
%3470 = arith.index_cast %c14_3605 : index to i64
%3471 = arith.addi %3470, %3469 : i64
%3472 = arith.subi %3468, %c1_i64_3633 : i64
%3473 = arith.muli %c1_i64_3618, %3472 : i64
%3474 = arith.subi %3471, %3473 : i64
%3475 = arith.subi %3474, %c1_i64_3633 : i64
%3476 = arith.floordivsi %3475, %c1_i64_3620 : i64
%3477 = arith.addi %3476, %c1_i64_3633 : i64
%3478 = arith.index_cast %3477 : i64 to index
%3479 = arith.index_cast %c1_3615 : index to i64
%c1_i64_3635 = arith.constant 1 : i64
%c2_i64_3636 = arith.constant 2 : i64
%3480 = arith.muli %3456, %c2_i64_3636 : i64
%3481 = arith.index_cast %c14_3607 : index to i64
%3482 = arith.addi %3481, %3480 : i64
%3483 = arith.subi %3479, %c1_i64_3635 : i64
%3484 = arith.muli %c1_i64_3619, %3483 : i64
%3485 = arith.subi %3482, %3484 : i64
%3486 = arith.subi %3485, %c1_i64_3635 : i64
%3487 = arith.floordivsi %3486, %c1_i64_3621 : i64
%3488 = arith.addi %3487, %c1_i64_3635 : i64
%3489 = arith.index_cast %3488 : i64 to index
%3490 = tensor.empty(%3478, %3489) : tensor<1x256x?x?xf32>
%3491 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3590 : tensor<256xf32>) outs(%3490 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3492 = arith.floordivsi %c1024_3603, %3459 : index
%3493 = arith.floordivsi %c256_3609, %3459 : index
%c0_3637 = arith.constant 0 : index
%c1_3638 = arith.constant 1 : index
%3494 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3632, %cast_3579 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3491 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_3639 = tensor.cast %3494 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_3640 = arith.constant 1 : index
%c1_3641 = arith.constant 1 : index
%c256_3642 = arith.constant 256 : index
%c2_3643 = arith.constant 2 : index
%c14_3644 = arith.constant 14 : index
%c3_3645 = arith.constant 3 : index
%c14_3646 = arith.constant 14 : index
%3495 = tensor.empty() : tensor<1x256x14x14xf32>
%3496 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3639 : tensor<1x256x14x14xf32>) outs(%3495 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_3647 = tensor.cast %3496 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3497 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3498 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3648 = torch.constant.int 12
%3499 = torch.aten.item %3497 : !torch.vtensor<[],f32> -> !torch.float
%3500 = torch_c.to_f64 %3499
%3501 = torch.aten.item %3498 : !torch.vtensor<[],si8> -> !torch.int
%3502 = torch_c.to_i64 %3501
%c1_3649 = arith.constant 1 : index
%c1_3650 = arith.constant 1 : index
%c256_3651 = arith.constant 256 : index
%c2_3652 = arith.constant 2 : index
%c14_3653 = arith.constant 14 : index
%c3_3654 = arith.constant 3 : index
%c14_3655 = arith.constant 14 : index
%3503 = tensor.empty() : tensor<1x256x14x14xi8>
%3504 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3647 : tensor<1x256x14x14xf32>) outs(%3503 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3501
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3499
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_3656 = tensor.cast %3504 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_3657 = tensor.cast %cast_3656 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3505 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3506 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3507 = torch.aten.item %3505 : !torch.vtensor<[],f32> -> !torch.float
%3508 = torch_c.to_f64 %3507
%3509 = torch.aten.item %3506 : !torch.vtensor<[],si8> -> !torch.int
%3510 = torch_c.to_i64 %3509
%cast_3658 = tensor.cast %cast_3657 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_3659 = arith.constant 1 : index
%c1_3660 = arith.constant 1 : index
%c256_3661 = arith.constant 256 : index
%c2_3662 = arith.constant 2 : index
%c14_3663 = arith.constant 14 : index
%c3_3664 = arith.constant 3 : index
%c14_3665 = arith.constant 14 : index
%3511 = tensor.empty() : tensor<1x256x14x14xf32>
%3512 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3658 : tensor<1x256x14x14xi8>) outs(%3511 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3509
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3507
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_3666 = tensor.cast %3512 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3513 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3514 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3667 = torch.constant.int 12
%3515 = torch.aten.item %3513 : !torch.vtensor<[],f32> -> !torch.float
%3516 = torch_c.to_f64 %3515
%3517 = torch.aten.item %3514 : !torch.vtensor<[],si8> -> !torch.int
%3518 = torch_c.to_i64 %3517
%c1_3668 = arith.constant 1 : index
%c0_3669 = arith.constant 0 : index
%c256_3670 = arith.constant 256 : index
%c1_3671 = arith.constant 1 : index
%c256_3672 = arith.constant 256 : index
%c2_3673 = arith.constant 2 : index
%c3_3674 = arith.constant 3 : index
%c3_3675 = arith.constant 3 : index
%c3_3676 = arith.constant 3 : index
%3519 = tensor.empty() : tensor<256x256x3x3xi8>
%3520 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%130 : tensor<256x256x3x3xf32>) outs(%3519 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3517
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3515
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_3677 = tensor.cast %3520 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_3678 = tensor.cast %cast_3677 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%3521 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3522 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3523 = torch.aten.item %3521 : !torch.vtensor<[],f32> -> !torch.float
%3524 = torch_c.to_f64 %3523
%3525 = torch.aten.item %3522 : !torch.vtensor<[],si8> -> !torch.int
%3526 = torch_c.to_i64 %3525
%cast_3679 = tensor.cast %cast_3678 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_3680 = arith.constant 1 : index
%c0_3681 = arith.constant 0 : index
%c256_3682 = arith.constant 256 : index
%c1_3683 = arith.constant 1 : index
%c256_3684 = arith.constant 256 : index
%c2_3685 = arith.constant 2 : index
%c3_3686 = arith.constant 3 : index
%c3_3687 = arith.constant 3 : index
%c3_3688 = arith.constant 3 : index
%3527 = tensor.empty() : tensor<256x256x3x3xf32>
%3528 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3679 : tensor<256x256x3x3xi8>) outs(%3527 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3525
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3523
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_3689 = tensor.cast %3528 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%3529 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3530 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3690 = torch.constant.int 12
%3531 = torch.aten.item %3529 : !torch.vtensor<[],f32> -> !torch.float
%3532 = torch_c.to_f64 %3531
%3533 = torch.aten.item %3530 : !torch.vtensor<[],si8> -> !torch.int
%3534 = torch_c.to_i64 %3533
%c1_3691 = arith.constant 1 : index
%c0_3692 = arith.constant 0 : index
%c256_3693 = arith.constant 256 : index
%3535 = tensor.empty() : tensor<256xi8>
%3536 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%132 : tensor<256xf32>) outs(%3535 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3533
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3531
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_3694 = tensor.cast %3536 : tensor<256xi8> to tensor<256xi8>
%cast_3695 = tensor.cast %cast_3694 : tensor<256xi8> to tensor<256xi8>
%3537 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3538 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3539 = torch.aten.item %3537 : !torch.vtensor<[],f32> -> !torch.float
%3540 = torch_c.to_f64 %3539
%3541 = torch.aten.item %3538 : !torch.vtensor<[],si8> -> !torch.int
%3542 = torch_c.to_i64 %3541
%cast_3696 = tensor.cast %cast_3695 : tensor<256xi8> to tensor<256xi8>
%c1_3697 = arith.constant 1 : index
%c0_3698 = arith.constant 0 : index
%c256_3699 = arith.constant 256 : index
%3543 = tensor.empty() : tensor<256xf32>
%3544 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3696 : tensor<256xi8>) outs(%3543 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3541
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3539
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_3700 = tensor.cast %3544 : tensor<256xf32> to tensor<256xf32>
%int1_3701 = torch.constant.int 1
%int1_3702 = torch.constant.int 1
%int1_3703 = torch.constant.int 1
%int1_3704 = torch.constant.int 1
%int1_3705 = torch.constant.int 1
%int1_3706 = torch.constant.int 1
%int0_3707 = torch.constant.int 0
%3545 = torch.prim.ListConstruct %int1_3701, %int1_3702 : (!torch.int, !torch.int) -> !torch.list<int>
%3546 = torch.prim.ListConstruct %int1_3703, %int1_3704 : (!torch.int, !torch.int) -> !torch.list<int>
%3547 = torch.prim.ListConstruct %int1_3705, %int1_3706 : (!torch.int, !torch.int) -> !torch.list<int>
%3548 = torch.prim.ListConstruct %int0_3707, %int0_3707 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3708 = torch.constant.bool false
%int1_3709 = torch.constant.int 1
%3549 = torch_c.to_i64 %int1_3709
%3550 = torch_c.to_i64 %int1_3701
%3551 = torch_c.to_i64 %int1_3702
%3552 = torch_c.to_i64 %int0_3707
%3553 = torch_c.to_i64 %int0_3707
%c0_3710 = arith.constant 0 : index
%c1_3711 = arith.constant 1 : index
%c1_3712 = arith.constant 1 : index
%c256_3713 = arith.constant 256 : index
%c2_3714 = arith.constant 2 : index
%c14_3715 = arith.constant 14 : index
%c3_3716 = arith.constant 3 : index
%c14_3717 = arith.constant 14 : index
%c0_3718 = arith.constant 0 : index
%c256_3719 = arith.constant 256 : index
%c1_3720 = arith.constant 1 : index
%c256_3721 = arith.constant 256 : index
%c2_3722 = arith.constant 2 : index
%c3_3723 = arith.constant 3 : index
%c3_3724 = arith.constant 3 : index
%c3_3725 = arith.constant 3 : index
%3554 = arith.index_cast %3549 : i64 to index
%c0_3726 = arith.constant 0 : index
%3555 = arith.remsi %c256_3713, %3554 : index
%3556 = arith.cmpi eq, %c0_3726, %3555 : index
cf.assert %3556, "invalid: groups must divide input channel size evenly."
%c0_3727 = arith.constant 0 : index
%3557 = arith.remsi %c256_3719, %3554 : index
%3558 = arith.cmpi eq, %c0_3727, %3557 : index
cf.assert %3558, "invalid: groups must divide weight batch size evenly."
%c1_i64_3728 = arith.constant 1 : i64
%c1_i64_3729 = arith.constant 1 : i64
%c1_i64_3730 = arith.constant 1 : i64
%c1_i64_3731 = arith.constant 1 : i64
%cst_3732 = arith.constant 0.000000e+00 : f32
%c0_3733 = arith.constant 0 : index
%c1_3734 = arith.constant 1 : index
%c1_3735 = arith.constant 1 : index
%c256_3736 = arith.constant 256 : index
%c2_3737 = arith.constant 2 : index
%c14_3738 = arith.constant 14 : index
%c3_3739 = arith.constant 3 : index
%c14_3740 = arith.constant 14 : index
%c0_i64_3741 = arith.constant 0 : i64
%3559 = arith.index_cast %c0_i64_3741 : i64 to index
%3560 = arith.index_cast %c0_i64_3741 : i64 to index
%3561 = arith.index_cast %3550 : i64 to index
%3562 = arith.index_cast %3551 : i64 to index
%padded_3742 = tensor.pad %cast_3666 low[%3559, %3560, %3561, %3562] high[%3559, %3560, %3561, %3562] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3732 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3563 = arith.index_cast %c3_3723 : index to i64
%c1_i64_3743 = arith.constant 1 : i64
%c2_i64_3744 = arith.constant 2 : i64
%3564 = arith.muli %3550, %c2_i64_3744 : i64
%3565 = arith.index_cast %c14_3715 : index to i64
%3566 = arith.addi %3565, %3564 : i64
%3567 = arith.subi %3563, %c1_i64_3743 : i64
%3568 = arith.muli %c1_i64_3728, %3567 : i64
%3569 = arith.subi %3566, %3568 : i64
%3570 = arith.subi %3569, %c1_i64_3743 : i64
%3571 = arith.floordivsi %3570, %c1_i64_3730 : i64
%3572 = arith.addi %3571, %c1_i64_3743 : i64
%3573 = arith.index_cast %3572 : i64 to index
%3574 = arith.index_cast %c3_3725 : index to i64
%c1_i64_3745 = arith.constant 1 : i64
%c2_i64_3746 = arith.constant 2 : i64
%3575 = arith.muli %3551, %c2_i64_3746 : i64
%3576 = arith.index_cast %c14_3717 : index to i64
%3577 = arith.addi %3576, %3575 : i64
%3578 = arith.subi %3574, %c1_i64_3745 : i64
%3579 = arith.muli %c1_i64_3729, %3578 : i64
%3580 = arith.subi %3577, %3579 : i64
%3581 = arith.subi %3580, %c1_i64_3745 : i64
%3582 = arith.floordivsi %3581, %c1_i64_3731 : i64
%3583 = arith.addi %3582, %c1_i64_3745 : i64
%3584 = arith.index_cast %3583 : i64 to index
%3585 = tensor.empty(%3573, %3584) : tensor<1x256x?x?xf32>
%3586 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3700 : tensor<256xf32>) outs(%3585 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3587 = arith.floordivsi %c256_3713, %3554 : index
%3588 = arith.floordivsi %c256_3719, %3554 : index
%c0_3747 = arith.constant 0 : index
%c1_3748 = arith.constant 1 : index
%3589 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3742, %cast_3689 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3586 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_3749 = tensor.cast %3589 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_3750 = arith.constant 1 : index
%c1_3751 = arith.constant 1 : index
%c256_3752 = arith.constant 256 : index
%c2_3753 = arith.constant 2 : index
%c14_3754 = arith.constant 14 : index
%c3_3755 = arith.constant 3 : index
%c14_3756 = arith.constant 14 : index
%3590 = tensor.empty() : tensor<1x256x14x14xf32>
%3591 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3749 : tensor<1x256x14x14xf32>) outs(%3590 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_3757 = tensor.cast %3591 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3592 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3593 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3758 = torch.constant.int 12
%3594 = torch.aten.item %3592 : !torch.vtensor<[],f32> -> !torch.float
%3595 = torch_c.to_f64 %3594
%3596 = torch.aten.item %3593 : !torch.vtensor<[],si8> -> !torch.int
%3597 = torch_c.to_i64 %3596
%c1_3759 = arith.constant 1 : index
%c1_3760 = arith.constant 1 : index
%c256_3761 = arith.constant 256 : index
%c2_3762 = arith.constant 2 : index
%c14_3763 = arith.constant 14 : index
%c3_3764 = arith.constant 3 : index
%c14_3765 = arith.constant 14 : index
%3598 = tensor.empty() : tensor<1x256x14x14xi8>
%3599 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3757 : tensor<1x256x14x14xf32>) outs(%3598 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3596
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3594
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_3766 = tensor.cast %3599 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_3767 = tensor.cast %cast_3766 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3600 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3601 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3602 = torch.aten.item %3600 : !torch.vtensor<[],f32> -> !torch.float
%3603 = torch_c.to_f64 %3602
%3604 = torch.aten.item %3601 : !torch.vtensor<[],si8> -> !torch.int
%3605 = torch_c.to_i64 %3604
%cast_3768 = tensor.cast %cast_3767 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_3769 = arith.constant 1 : index
%c1_3770 = arith.constant 1 : index
%c256_3771 = arith.constant 256 : index
%c2_3772 = arith.constant 2 : index
%c14_3773 = arith.constant 14 : index
%c3_3774 = arith.constant 3 : index
%c14_3775 = arith.constant 14 : index
%3606 = tensor.empty() : tensor<1x256x14x14xf32>
%3607 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3768 : tensor<1x256x14x14xi8>) outs(%3606 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3604
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3602
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_3776 = tensor.cast %3607 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3608 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3609 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3777 = torch.constant.int 12
%3610 = torch.aten.item %3608 : !torch.vtensor<[],f32> -> !torch.float
%3611 = torch_c.to_f64 %3610
%3612 = torch.aten.item %3609 : !torch.vtensor<[],si8> -> !torch.int
%3613 = torch_c.to_i64 %3612
%c1_3778 = arith.constant 1 : index
%c0_3779 = arith.constant 0 : index
%c1024_3780 = arith.constant 1024 : index
%c1_3781 = arith.constant 1 : index
%c256_3782 = arith.constant 256 : index
%3614 = tensor.empty() : tensor<1024x256x1x1xi8>
%3615 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134 : tensor<1024x256x1x1xf32>) outs(%3614 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3612
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3610
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_3783 = tensor.cast %3615 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_3784 = tensor.cast %cast_3783 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%3616 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3617 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3618 = torch.aten.item %3616 : !torch.vtensor<[],f32> -> !torch.float
%3619 = torch_c.to_f64 %3618
%3620 = torch.aten.item %3617 : !torch.vtensor<[],si8> -> !torch.int
%3621 = torch_c.to_i64 %3620
%cast_3785 = tensor.cast %cast_3784 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_3786 = arith.constant 1 : index
%c0_3787 = arith.constant 0 : index
%c1024_3788 = arith.constant 1024 : index
%c1_3789 = arith.constant 1 : index
%c256_3790 = arith.constant 256 : index
%3622 = tensor.empty() : tensor<1024x256x1x1xf32>
%3623 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3785 : tensor<1024x256x1x1xi8>) outs(%3622 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3620
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3618
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_3791 = tensor.cast %3623 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%3624 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3625 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3792 = torch.constant.int 12
%3626 = torch.aten.item %3624 : !torch.vtensor<[],f32> -> !torch.float
%3627 = torch_c.to_f64 %3626
%3628 = torch.aten.item %3625 : !torch.vtensor<[],si8> -> !torch.int
%3629 = torch_c.to_i64 %3628
%c1_3793 = arith.constant 1 : index
%c0_3794 = arith.constant 0 : index
%c1024_3795 = arith.constant 1024 : index
%3630 = tensor.empty() : tensor<1024xi8>
%3631 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%136 : tensor<1024xf32>) outs(%3630 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3628
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3626
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_3796 = tensor.cast %3631 : tensor<1024xi8> to tensor<1024xi8>
%cast_3797 = tensor.cast %cast_3796 : tensor<1024xi8> to tensor<1024xi8>
%3632 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3633 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3634 = torch.aten.item %3632 : !torch.vtensor<[],f32> -> !torch.float
%3635 = torch_c.to_f64 %3634
%3636 = torch.aten.item %3633 : !torch.vtensor<[],si8> -> !torch.int
%3637 = torch_c.to_i64 %3636
%cast_3798 = tensor.cast %cast_3797 : tensor<1024xi8> to tensor<1024xi8>
%c1_3799 = arith.constant 1 : index
%c0_3800 = arith.constant 0 : index
%c1024_3801 = arith.constant 1024 : index
%3638 = tensor.empty() : tensor<1024xf32>
%3639 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3798 : tensor<1024xi8>) outs(%3638 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3636
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3634
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_3802 = tensor.cast %3639 : tensor<1024xf32> to tensor<1024xf32>
%int0_3803 = torch.constant.int 0
%int0_3804 = torch.constant.int 0
%int1_3805 = torch.constant.int 1
%int1_3806 = torch.constant.int 1
%int1_3807 = torch.constant.int 1
%int1_3808 = torch.constant.int 1
%int0_3809 = torch.constant.int 0
%3640 = torch.prim.ListConstruct %int0_3803, %int0_3804 : (!torch.int, !torch.int) -> !torch.list<int>
%3641 = torch.prim.ListConstruct %int1_3805, %int1_3806 : (!torch.int, !torch.int) -> !torch.list<int>
%3642 = torch.prim.ListConstruct %int1_3807, %int1_3808 : (!torch.int, !torch.int) -> !torch.list<int>
%3643 = torch.prim.ListConstruct %int0_3809, %int0_3809 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3810 = torch.constant.bool false
%int1_3811 = torch.constant.int 1
%3644 = torch_c.to_i64 %int1_3811
%3645 = torch_c.to_i64 %int0_3803
%3646 = torch_c.to_i64 %int0_3804
%3647 = torch_c.to_i64 %int0_3809
%3648 = torch_c.to_i64 %int0_3809
%c0_3812 = arith.constant 0 : index
%c1_3813 = arith.constant 1 : index
%c1_3814 = arith.constant 1 : index
%c256_3815 = arith.constant 256 : index
%c2_3816 = arith.constant 2 : index
%c14_3817 = arith.constant 14 : index
%c3_3818 = arith.constant 3 : index
%c14_3819 = arith.constant 14 : index
%c0_3820 = arith.constant 0 : index
%c1024_3821 = arith.constant 1024 : index
%c1_3822 = arith.constant 1 : index
%c256_3823 = arith.constant 256 : index
%c2_3824 = arith.constant 2 : index
%c1_3825 = arith.constant 1 : index
%c3_3826 = arith.constant 3 : index
%c1_3827 = arith.constant 1 : index
%3649 = arith.index_cast %3644 : i64 to index
%c0_3828 = arith.constant 0 : index
%3650 = arith.remsi %c256_3815, %3649 : index
%3651 = arith.cmpi eq, %c0_3828, %3650 : index
cf.assert %3651, "invalid: groups must divide input channel size evenly."
%c0_3829 = arith.constant 0 : index
%3652 = arith.remsi %c1024_3821, %3649 : index
%3653 = arith.cmpi eq, %c0_3829, %3652 : index
cf.assert %3653, "invalid: groups must divide weight batch size evenly."
%c1_i64_3830 = arith.constant 1 : i64
%c1_i64_3831 = arith.constant 1 : i64
%c1_i64_3832 = arith.constant 1 : i64
%c1_i64_3833 = arith.constant 1 : i64
%cst_3834 = arith.constant 0.000000e+00 : f32
%c0_3835 = arith.constant 0 : index
%c1_3836 = arith.constant 1 : index
%c1_3837 = arith.constant 1 : index
%c256_3838 = arith.constant 256 : index
%c2_3839 = arith.constant 2 : index
%c14_3840 = arith.constant 14 : index
%c3_3841 = arith.constant 3 : index
%c14_3842 = arith.constant 14 : index
%c0_i64_3843 = arith.constant 0 : i64
%3654 = arith.index_cast %c0_i64_3843 : i64 to index
%3655 = arith.index_cast %c0_i64_3843 : i64 to index
%3656 = arith.index_cast %3645 : i64 to index
%3657 = arith.index_cast %3646 : i64 to index
%padded_3844 = tensor.pad %cast_3776 low[%3654, %3655, %3656, %3657] high[%3654, %3655, %3656, %3657] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3834 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3658 = arith.index_cast %c1_3825 : index to i64
%c1_i64_3845 = arith.constant 1 : i64
%c2_i64_3846 = arith.constant 2 : i64
%3659 = arith.muli %3645, %c2_i64_3846 : i64
%3660 = arith.index_cast %c14_3817 : index to i64
%3661 = arith.addi %3660, %3659 : i64
%3662 = arith.subi %3658, %c1_i64_3845 : i64
%3663 = arith.muli %c1_i64_3830, %3662 : i64
%3664 = arith.subi %3661, %3663 : i64
%3665 = arith.subi %3664, %c1_i64_3845 : i64
%3666 = arith.floordivsi %3665, %c1_i64_3832 : i64
%3667 = arith.addi %3666, %c1_i64_3845 : i64
%3668 = arith.index_cast %3667 : i64 to index
%3669 = arith.index_cast %c1_3827 : index to i64
%c1_i64_3847 = arith.constant 1 : i64
%c2_i64_3848 = arith.constant 2 : i64
%3670 = arith.muli %3646, %c2_i64_3848 : i64
%3671 = arith.index_cast %c14_3819 : index to i64
%3672 = arith.addi %3671, %3670 : i64
%3673 = arith.subi %3669, %c1_i64_3847 : i64
%3674 = arith.muli %c1_i64_3831, %3673 : i64
%3675 = arith.subi %3672, %3674 : i64
%3676 = arith.subi %3675, %c1_i64_3847 : i64
%3677 = arith.floordivsi %3676, %c1_i64_3833 : i64
%3678 = arith.addi %3677, %c1_i64_3847 : i64
%3679 = arith.index_cast %3678 : i64 to index
%3680 = tensor.empty(%3668, %3679) : tensor<1x1024x?x?xf32>
%3681 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3802 : tensor<1024xf32>) outs(%3680 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%3682 = arith.floordivsi %c256_3815, %3649 : index
%3683 = arith.floordivsi %c1024_3821, %3649 : index
%c0_3849 = arith.constant 0 : index
%c1_3850 = arith.constant 1 : index
%3684 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3844, %cast_3791 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3681 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_3851 = tensor.cast %3684 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%3685 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3686 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3852 = torch.constant.int 12
%3687 = torch.aten.item %3685 : !torch.vtensor<[],f32> -> !torch.float
%3688 = torch_c.to_f64 %3687
%3689 = torch.aten.item %3686 : !torch.vtensor<[],si8> -> !torch.int
%3690 = torch_c.to_i64 %3689
%c1_3853 = arith.constant 1 : index
%c1_3854 = arith.constant 1 : index
%c1024_3855 = arith.constant 1024 : index
%c2_3856 = arith.constant 2 : index
%c14_3857 = arith.constant 14 : index
%c3_3858 = arith.constant 3 : index
%c14_3859 = arith.constant 14 : index
%3691 = tensor.empty() : tensor<1x1024x14x14xi8>
%3692 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3851 : tensor<1x1024x14x14xf32>) outs(%3691 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3689
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3687
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3860 = tensor.cast %3692 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3861 = tensor.cast %cast_3860 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3693 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3694 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3695 = torch.aten.item %3693 : !torch.vtensor<[],f32> -> !torch.float
%3696 = torch_c.to_f64 %3695
%3697 = torch.aten.item %3694 : !torch.vtensor<[],si8> -> !torch.int
%3698 = torch_c.to_i64 %3697
%cast_3862 = tensor.cast %cast_3861 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3863 = arith.constant 1 : index
%c1_3864 = arith.constant 1 : index
%c1024_3865 = arith.constant 1024 : index
%c2_3866 = arith.constant 2 : index
%c14_3867 = arith.constant 14 : index
%c3_3868 = arith.constant 3 : index
%c14_3869 = arith.constant 14 : index
%3699 = tensor.empty() : tensor<1x1024x14x14xf32>
%3700 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3862 : tensor<1x1024x14x14xi8>) outs(%3699 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3697
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3695
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3870 = tensor.cast %3700 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_3871 = torch.constant.int 1
%3701 = torch_c.to_i64 %int1_3871
%c1_3872 = arith.constant 1 : index
%c1_3873 = arith.constant 1 : index
%c1024_3874 = arith.constant 1024 : index
%c2_3875 = arith.constant 2 : index
%c14_3876 = arith.constant 14 : index
%c3_3877 = arith.constant 3 : index
%c14_3878 = arith.constant 14 : index
%c1_3879 = arith.constant 1 : index
%c1024_3880 = arith.constant 1024 : index
%3702 = arith.cmpi eq, %c1024_3874, %c1024_3880 : index
cf.assert %3702, "mismatched size for broadcast"
%c2_3881 = arith.constant 2 : index
%c14_3882 = arith.constant 14 : index
%3703 = arith.cmpi eq, %c14_3876, %c14_3882 : index
cf.assert %3703, "mismatched size for broadcast"
%c3_3883 = arith.constant 3 : index
%c14_3884 = arith.constant 14 : index
%3704 = arith.cmpi eq, %c14_3878, %c14_3884 : index
cf.assert %3704, "mismatched size for broadcast"
%3705 = tensor.empty() : tensor<1x1024x14x14xf32>
%3706 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3870, %cast_3564 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3705 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %3701 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3885 = tensor.cast %3706 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_3886 = arith.constant 1 : index
%c1_3887 = arith.constant 1 : index
%c1024_3888 = arith.constant 1024 : index
%c2_3889 = arith.constant 2 : index
%c14_3890 = arith.constant 14 : index
%c3_3891 = arith.constant 3 : index
%c14_3892 = arith.constant 14 : index
%3707 = tensor.empty() : tensor<1x1024x14x14xf32>
%3708 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3885 : tensor<1x1024x14x14xf32>) outs(%3707 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3893 = tensor.cast %3708 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3709 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3710 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3894 = torch.constant.int 12
%3711 = torch.aten.item %3709 : !torch.vtensor<[],f32> -> !torch.float
%3712 = torch_c.to_f64 %3711
%3713 = torch.aten.item %3710 : !torch.vtensor<[],si8> -> !torch.int
%3714 = torch_c.to_i64 %3713
%c1_3895 = arith.constant 1 : index
%c1_3896 = arith.constant 1 : index
%c1024_3897 = arith.constant 1024 : index
%c2_3898 = arith.constant 2 : index
%c14_3899 = arith.constant 14 : index
%c3_3900 = arith.constant 3 : index
%c14_3901 = arith.constant 14 : index
%3715 = tensor.empty() : tensor<1x1024x14x14xi8>
%3716 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3893 : tensor<1x1024x14x14xf32>) outs(%3715 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3713
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3711
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_3902 = tensor.cast %3716 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_3903 = tensor.cast %cast_3902 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%3717 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%3718 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3719 = torch.aten.item %3717 : !torch.vtensor<[],f32> -> !torch.float
%3720 = torch_c.to_f64 %3719
%3721 = torch.aten.item %3718 : !torch.vtensor<[],si8> -> !torch.int
%3722 = torch_c.to_i64 %3721
%cast_3904 = tensor.cast %cast_3903 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_3905 = arith.constant 1 : index
%c1_3906 = arith.constant 1 : index
%c1024_3907 = arith.constant 1024 : index
%c2_3908 = arith.constant 2 : index
%c14_3909 = arith.constant 14 : index
%c3_3910 = arith.constant 3 : index
%c14_3911 = arith.constant 14 : index
%3723 = tensor.empty() : tensor<1x1024x14x14xf32>
%3724 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3904 : tensor<1x1024x14x14xi8>) outs(%3723 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3721
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3719
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_3912 = tensor.cast %3724 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%3725 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3726 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3913 = torch.constant.int 12
%3727 = torch.aten.item %3725 : !torch.vtensor<[],f32> -> !torch.float
%3728 = torch_c.to_f64 %3727
%3729 = torch.aten.item %3726 : !torch.vtensor<[],si8> -> !torch.int
%3730 = torch_c.to_i64 %3729
%c1_3914 = arith.constant 1 : index
%c0_3915 = arith.constant 0 : index
%c256_3916 = arith.constant 256 : index
%c1_3917 = arith.constant 1 : index
%c1024_3918 = arith.constant 1024 : index
%3731 = tensor.empty() : tensor<256x1024x1x1xi8>
%3732 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138 : tensor<256x1024x1x1xf32>) outs(%3731 : tensor<256x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3729
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3727
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x1024x1x1xi8>
%cast_3919 = tensor.cast %3732 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%cast_3920 = tensor.cast %cast_3919 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%3733 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%3734 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3735 = torch.aten.item %3733 : !torch.vtensor<[],f32> -> !torch.float
%3736 = torch_c.to_f64 %3735
%3737 = torch.aten.item %3734 : !torch.vtensor<[],si8> -> !torch.int
%3738 = torch_c.to_i64 %3737
%cast_3921 = tensor.cast %cast_3920 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%c1_3922 = arith.constant 1 : index
%c0_3923 = arith.constant 0 : index
%c256_3924 = arith.constant 256 : index
%c1_3925 = arith.constant 1 : index
%c1024_3926 = arith.constant 1024 : index
%3739 = tensor.empty() : tensor<256x1024x1x1xf32>
%3740 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3921 : tensor<256x1024x1x1xi8>) outs(%3739 : tensor<256x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3737
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3735
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x1024x1x1xf32>
%cast_3927 = tensor.cast %3740 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
%3741 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3742 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3928 = torch.constant.int 12
%3743 = torch.aten.item %3741 : !torch.vtensor<[],f32> -> !torch.float
%3744 = torch_c.to_f64 %3743
%3745 = torch.aten.item %3742 : !torch.vtensor<[],si8> -> !torch.int
%3746 = torch_c.to_i64 %3745
%c1_3929 = arith.constant 1 : index
%c0_3930 = arith.constant 0 : index
%c256_3931 = arith.constant 256 : index
%3747 = tensor.empty() : tensor<256xi8>
%3748 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%140 : tensor<256xf32>) outs(%3747 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3745
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3743
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_3932 = tensor.cast %3748 : tensor<256xi8> to tensor<256xi8>
%cast_3933 = tensor.cast %cast_3932 : tensor<256xi8> to tensor<256xi8>
%3749 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3750 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3751 = torch.aten.item %3749 : !torch.vtensor<[],f32> -> !torch.float
%3752 = torch_c.to_f64 %3751
%3753 = torch.aten.item %3750 : !torch.vtensor<[],si8> -> !torch.int
%3754 = torch_c.to_i64 %3753
%cast_3934 = tensor.cast %cast_3933 : tensor<256xi8> to tensor<256xi8>
%c1_3935 = arith.constant 1 : index
%c0_3936 = arith.constant 0 : index
%c256_3937 = arith.constant 256 : index
%3755 = tensor.empty() : tensor<256xf32>
%3756 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3934 : tensor<256xi8>) outs(%3755 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3753
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3751
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_3938 = tensor.cast %3756 : tensor<256xf32> to tensor<256xf32>
%int0_3939 = torch.constant.int 0
%int0_3940 = torch.constant.int 0
%int1_3941 = torch.constant.int 1
%int1_3942 = torch.constant.int 1
%int1_3943 = torch.constant.int 1
%int1_3944 = torch.constant.int 1
%int0_3945 = torch.constant.int 0
%3757 = torch.prim.ListConstruct %int0_3939, %int0_3940 : (!torch.int, !torch.int) -> !torch.list<int>
%3758 = torch.prim.ListConstruct %int1_3941, %int1_3942 : (!torch.int, !torch.int) -> !torch.list<int>
%3759 = torch.prim.ListConstruct %int1_3943, %int1_3944 : (!torch.int, !torch.int) -> !torch.list<int>
%3760 = torch.prim.ListConstruct %int0_3945, %int0_3945 : (!torch.int, !torch.int) -> !torch.list<int>
%false_3946 = torch.constant.bool false
%int1_3947 = torch.constant.int 1
%3761 = torch_c.to_i64 %int1_3947
%3762 = torch_c.to_i64 %int0_3939
%3763 = torch_c.to_i64 %int0_3940
%3764 = torch_c.to_i64 %int0_3945
%3765 = torch_c.to_i64 %int0_3945
%c0_3948 = arith.constant 0 : index
%c1_3949 = arith.constant 1 : index
%c1_3950 = arith.constant 1 : index
%c1024_3951 = arith.constant 1024 : index
%c2_3952 = arith.constant 2 : index
%c14_3953 = arith.constant 14 : index
%c3_3954 = arith.constant 3 : index
%c14_3955 = arith.constant 14 : index
%c0_3956 = arith.constant 0 : index
%c256_3957 = arith.constant 256 : index
%c1_3958 = arith.constant 1 : index
%c1024_3959 = arith.constant 1024 : index
%c2_3960 = arith.constant 2 : index
%c1_3961 = arith.constant 1 : index
%c3_3962 = arith.constant 3 : index
%c1_3963 = arith.constant 1 : index
%3766 = arith.index_cast %3761 : i64 to index
%c0_3964 = arith.constant 0 : index
%3767 = arith.remsi %c1024_3951, %3766 : index
%3768 = arith.cmpi eq, %c0_3964, %3767 : index
cf.assert %3768, "invalid: groups must divide input channel size evenly."
%c0_3965 = arith.constant 0 : index
%3769 = arith.remsi %c256_3957, %3766 : index
%3770 = arith.cmpi eq, %c0_3965, %3769 : index
cf.assert %3770, "invalid: groups must divide weight batch size evenly."
%c1_i64_3966 = arith.constant 1 : i64
%c1_i64_3967 = arith.constant 1 : i64
%c1_i64_3968 = arith.constant 1 : i64
%c1_i64_3969 = arith.constant 1 : i64
%cst_3970 = arith.constant 0.000000e+00 : f32
%c0_3971 = arith.constant 0 : index
%c1_3972 = arith.constant 1 : index
%c1_3973 = arith.constant 1 : index
%c1024_3974 = arith.constant 1024 : index
%c2_3975 = arith.constant 2 : index
%c14_3976 = arith.constant 14 : index
%c3_3977 = arith.constant 3 : index
%c14_3978 = arith.constant 14 : index
%c0_i64_3979 = arith.constant 0 : i64
%3771 = arith.index_cast %c0_i64_3979 : i64 to index
%3772 = arith.index_cast %c0_i64_3979 : i64 to index
%3773 = arith.index_cast %3762 : i64 to index
%3774 = arith.index_cast %3763 : i64 to index
%padded_3980 = tensor.pad %cast_3912 low[%3771, %3772, %3773, %3774] high[%3771, %3772, %3773, %3774] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_3970 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%3775 = arith.index_cast %c1_3961 : index to i64
%c1_i64_3981 = arith.constant 1 : i64
%c2_i64_3982 = arith.constant 2 : i64
%3776 = arith.muli %3762, %c2_i64_3982 : i64
%3777 = arith.index_cast %c14_3953 : index to i64
%3778 = arith.addi %3777, %3776 : i64
%3779 = arith.subi %3775, %c1_i64_3981 : i64
%3780 = arith.muli %c1_i64_3966, %3779 : i64
%3781 = arith.subi %3778, %3780 : i64
%3782 = arith.subi %3781, %c1_i64_3981 : i64
%3783 = arith.floordivsi %3782, %c1_i64_3968 : i64
%3784 = arith.addi %3783, %c1_i64_3981 : i64
%3785 = arith.index_cast %3784 : i64 to index
%3786 = arith.index_cast %c1_3963 : index to i64
%c1_i64_3983 = arith.constant 1 : i64
%c2_i64_3984 = arith.constant 2 : i64
%3787 = arith.muli %3763, %c2_i64_3984 : i64
%3788 = arith.index_cast %c14_3955 : index to i64
%3789 = arith.addi %3788, %3787 : i64
%3790 = arith.subi %3786, %c1_i64_3983 : i64
%3791 = arith.muli %c1_i64_3967, %3790 : i64
%3792 = arith.subi %3789, %3791 : i64
%3793 = arith.subi %3792, %c1_i64_3983 : i64
%3794 = arith.floordivsi %3793, %c1_i64_3969 : i64
%3795 = arith.addi %3794, %c1_i64_3983 : i64
%3796 = arith.index_cast %3795 : i64 to index
%3797 = tensor.empty(%3785, %3796) : tensor<1x256x?x?xf32>
%3798 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3938 : tensor<256xf32>) outs(%3797 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3799 = arith.floordivsi %c1024_3951, %3766 : index
%3800 = arith.floordivsi %c256_3957, %3766 : index
%c0_3985 = arith.constant 0 : index
%c1_3986 = arith.constant 1 : index
%3801 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3980, %cast_3927 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3798 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_3987 = tensor.cast %3801 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_3988 = arith.constant 1 : index
%c1_3989 = arith.constant 1 : index
%c256_3990 = arith.constant 256 : index
%c2_3991 = arith.constant 2 : index
%c14_3992 = arith.constant 14 : index
%c3_3993 = arith.constant 3 : index
%c14_3994 = arith.constant 14 : index
%3802 = tensor.empty() : tensor<1x256x14x14xf32>
%3803 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3987 : tensor<1x256x14x14xf32>) outs(%3802 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_3995 = tensor.cast %3803 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3804 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3805 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_3996 = torch.constant.int 12
%3806 = torch.aten.item %3804 : !torch.vtensor<[],f32> -> !torch.float
%3807 = torch_c.to_f64 %3806
%3808 = torch.aten.item %3805 : !torch.vtensor<[],si8> -> !torch.int
%3809 = torch_c.to_i64 %3808
%c1_3997 = arith.constant 1 : index
%c1_3998 = arith.constant 1 : index
%c256_3999 = arith.constant 256 : index
%c2_4000 = arith.constant 2 : index
%c14_4001 = arith.constant 14 : index
%c3_4002 = arith.constant 3 : index
%c14_4003 = arith.constant 14 : index
%3810 = tensor.empty() : tensor<1x256x14x14xi8>
%3811 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3995 : tensor<1x256x14x14xf32>) outs(%3810 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3808
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3806
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4004 = tensor.cast %3811 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4005 = tensor.cast %cast_4004 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3812 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3813 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3814 = torch.aten.item %3812 : !torch.vtensor<[],f32> -> !torch.float
%3815 = torch_c.to_f64 %3814
%3816 = torch.aten.item %3813 : !torch.vtensor<[],si8> -> !torch.int
%3817 = torch_c.to_i64 %3816
%cast_4006 = tensor.cast %cast_4005 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4007 = arith.constant 1 : index
%c1_4008 = arith.constant 1 : index
%c256_4009 = arith.constant 256 : index
%c2_4010 = arith.constant 2 : index
%c14_4011 = arith.constant 14 : index
%c3_4012 = arith.constant 3 : index
%c14_4013 = arith.constant 14 : index
%3818 = tensor.empty() : tensor<1x256x14x14xf32>
%3819 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4006 : tensor<1x256x14x14xi8>) outs(%3818 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3816
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3814
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4014 = tensor.cast %3819 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3820 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3821 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4015 = torch.constant.int 12
%3822 = torch.aten.item %3820 : !torch.vtensor<[],f32> -> !torch.float
%3823 = torch_c.to_f64 %3822
%3824 = torch.aten.item %3821 : !torch.vtensor<[],si8> -> !torch.int
%3825 = torch_c.to_i64 %3824
%c1_4016 = arith.constant 1 : index
%c0_4017 = arith.constant 0 : index
%c256_4018 = arith.constant 256 : index
%c1_4019 = arith.constant 1 : index
%c256_4020 = arith.constant 256 : index
%c2_4021 = arith.constant 2 : index
%c3_4022 = arith.constant 3 : index
%c3_4023 = arith.constant 3 : index
%c3_4024 = arith.constant 3 : index
%3826 = tensor.empty() : tensor<256x256x3x3xi8>
%3827 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142 : tensor<256x256x3x3xf32>) outs(%3826 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3824
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3822
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_4025 = tensor.cast %3827 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_4026 = tensor.cast %cast_4025 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%3828 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3829 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3830 = torch.aten.item %3828 : !torch.vtensor<[],f32> -> !torch.float
%3831 = torch_c.to_f64 %3830
%3832 = torch.aten.item %3829 : !torch.vtensor<[],si8> -> !torch.int
%3833 = torch_c.to_i64 %3832
%cast_4027 = tensor.cast %cast_4026 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_4028 = arith.constant 1 : index
%c0_4029 = arith.constant 0 : index
%c256_4030 = arith.constant 256 : index
%c1_4031 = arith.constant 1 : index
%c256_4032 = arith.constant 256 : index
%c2_4033 = arith.constant 2 : index
%c3_4034 = arith.constant 3 : index
%c3_4035 = arith.constant 3 : index
%c3_4036 = arith.constant 3 : index
%3834 = tensor.empty() : tensor<256x256x3x3xf32>
%3835 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4027 : tensor<256x256x3x3xi8>) outs(%3834 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3832
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3830
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_4037 = tensor.cast %3835 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%3836 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3837 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4038 = torch.constant.int 12
%3838 = torch.aten.item %3836 : !torch.vtensor<[],f32> -> !torch.float
%3839 = torch_c.to_f64 %3838
%3840 = torch.aten.item %3837 : !torch.vtensor<[],si8> -> !torch.int
%3841 = torch_c.to_i64 %3840
%c1_4039 = arith.constant 1 : index
%c0_4040 = arith.constant 0 : index
%c256_4041 = arith.constant 256 : index
%3842 = tensor.empty() : tensor<256xi8>
%3843 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%144 : tensor<256xf32>) outs(%3842 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3840
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3838
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_4042 = tensor.cast %3843 : tensor<256xi8> to tensor<256xi8>
%cast_4043 = tensor.cast %cast_4042 : tensor<256xi8> to tensor<256xi8>
%3844 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3845 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3846 = torch.aten.item %3844 : !torch.vtensor<[],f32> -> !torch.float
%3847 = torch_c.to_f64 %3846
%3848 = torch.aten.item %3845 : !torch.vtensor<[],si8> -> !torch.int
%3849 = torch_c.to_i64 %3848
%cast_4044 = tensor.cast %cast_4043 : tensor<256xi8> to tensor<256xi8>
%c1_4045 = arith.constant 1 : index
%c0_4046 = arith.constant 0 : index
%c256_4047 = arith.constant 256 : index
%3850 = tensor.empty() : tensor<256xf32>
%3851 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4044 : tensor<256xi8>) outs(%3850 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3848
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3846
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_4048 = tensor.cast %3851 : tensor<256xf32> to tensor<256xf32>
%int1_4049 = torch.constant.int 1
%int1_4050 = torch.constant.int 1
%int1_4051 = torch.constant.int 1
%int1_4052 = torch.constant.int 1
%int1_4053 = torch.constant.int 1
%int1_4054 = torch.constant.int 1
%int0_4055 = torch.constant.int 0
%3852 = torch.prim.ListConstruct %int1_4049, %int1_4050 : (!torch.int, !torch.int) -> !torch.list<int>
%3853 = torch.prim.ListConstruct %int1_4051, %int1_4052 : (!torch.int, !torch.int) -> !torch.list<int>
%3854 = torch.prim.ListConstruct %int1_4053, %int1_4054 : (!torch.int, !torch.int) -> !torch.list<int>
%3855 = torch.prim.ListConstruct %int0_4055, %int0_4055 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4056 = torch.constant.bool false
%int1_4057 = torch.constant.int 1
%3856 = torch_c.to_i64 %int1_4057
%3857 = torch_c.to_i64 %int1_4049
%3858 = torch_c.to_i64 %int1_4050
%3859 = torch_c.to_i64 %int0_4055
%3860 = torch_c.to_i64 %int0_4055
%c0_4058 = arith.constant 0 : index
%c1_4059 = arith.constant 1 : index
%c1_4060 = arith.constant 1 : index
%c256_4061 = arith.constant 256 : index
%c2_4062 = arith.constant 2 : index
%c14_4063 = arith.constant 14 : index
%c3_4064 = arith.constant 3 : index
%c14_4065 = arith.constant 14 : index
%c0_4066 = arith.constant 0 : index
%c256_4067 = arith.constant 256 : index
%c1_4068 = arith.constant 1 : index
%c256_4069 = arith.constant 256 : index
%c2_4070 = arith.constant 2 : index
%c3_4071 = arith.constant 3 : index
%c3_4072 = arith.constant 3 : index
%c3_4073 = arith.constant 3 : index
%3861 = arith.index_cast %3856 : i64 to index
%c0_4074 = arith.constant 0 : index
%3862 = arith.remsi %c256_4061, %3861 : index
%3863 = arith.cmpi eq, %c0_4074, %3862 : index
cf.assert %3863, "invalid: groups must divide input channel size evenly."
%c0_4075 = arith.constant 0 : index
%3864 = arith.remsi %c256_4067, %3861 : index
%3865 = arith.cmpi eq, %c0_4075, %3864 : index
cf.assert %3865, "invalid: groups must divide weight batch size evenly."
%c1_i64_4076 = arith.constant 1 : i64
%c1_i64_4077 = arith.constant 1 : i64
%c1_i64_4078 = arith.constant 1 : i64
%c1_i64_4079 = arith.constant 1 : i64
%cst_4080 = arith.constant 0.000000e+00 : f32
%c0_4081 = arith.constant 0 : index
%c1_4082 = arith.constant 1 : index
%c1_4083 = arith.constant 1 : index
%c256_4084 = arith.constant 256 : index
%c2_4085 = arith.constant 2 : index
%c14_4086 = arith.constant 14 : index
%c3_4087 = arith.constant 3 : index
%c14_4088 = arith.constant 14 : index
%c0_i64_4089 = arith.constant 0 : i64
%3866 = arith.index_cast %c0_i64_4089 : i64 to index
%3867 = arith.index_cast %c0_i64_4089 : i64 to index
%3868 = arith.index_cast %3857 : i64 to index
%3869 = arith.index_cast %3858 : i64 to index
%padded_4090 = tensor.pad %cast_4014 low[%3866, %3867, %3868, %3869] high[%3866, %3867, %3868, %3869] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4080 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3870 = arith.index_cast %c3_4071 : index to i64
%c1_i64_4091 = arith.constant 1 : i64
%c2_i64_4092 = arith.constant 2 : i64
%3871 = arith.muli %3857, %c2_i64_4092 : i64
%3872 = arith.index_cast %c14_4063 : index to i64
%3873 = arith.addi %3872, %3871 : i64
%3874 = arith.subi %3870, %c1_i64_4091 : i64
%3875 = arith.muli %c1_i64_4076, %3874 : i64
%3876 = arith.subi %3873, %3875 : i64
%3877 = arith.subi %3876, %c1_i64_4091 : i64
%3878 = arith.floordivsi %3877, %c1_i64_4078 : i64
%3879 = arith.addi %3878, %c1_i64_4091 : i64
%3880 = arith.index_cast %3879 : i64 to index
%3881 = arith.index_cast %c3_4073 : index to i64
%c1_i64_4093 = arith.constant 1 : i64
%c2_i64_4094 = arith.constant 2 : i64
%3882 = arith.muli %3858, %c2_i64_4094 : i64
%3883 = arith.index_cast %c14_4065 : index to i64
%3884 = arith.addi %3883, %3882 : i64
%3885 = arith.subi %3881, %c1_i64_4093 : i64
%3886 = arith.muli %c1_i64_4077, %3885 : i64
%3887 = arith.subi %3884, %3886 : i64
%3888 = arith.subi %3887, %c1_i64_4093 : i64
%3889 = arith.floordivsi %3888, %c1_i64_4079 : i64
%3890 = arith.addi %3889, %c1_i64_4093 : i64
%3891 = arith.index_cast %3890 : i64 to index
%3892 = tensor.empty(%3880, %3891) : tensor<1x256x?x?xf32>
%3893 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4048 : tensor<256xf32>) outs(%3892 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%3894 = arith.floordivsi %c256_4061, %3861 : index
%3895 = arith.floordivsi %c256_4067, %3861 : index
%c0_4095 = arith.constant 0 : index
%c1_4096 = arith.constant 1 : index
%3896 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4090, %cast_4037 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3893 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_4097 = tensor.cast %3896 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_4098 = arith.constant 1 : index
%c1_4099 = arith.constant 1 : index
%c256_4100 = arith.constant 256 : index
%c2_4101 = arith.constant 2 : index
%c14_4102 = arith.constant 14 : index
%c3_4103 = arith.constant 3 : index
%c14_4104 = arith.constant 14 : index
%3897 = tensor.empty() : tensor<1x256x14x14xf32>
%3898 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4097 : tensor<1x256x14x14xf32>) outs(%3897 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_4105 = tensor.cast %3898 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3899 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3900 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4106 = torch.constant.int 12
%3901 = torch.aten.item %3899 : !torch.vtensor<[],f32> -> !torch.float
%3902 = torch_c.to_f64 %3901
%3903 = torch.aten.item %3900 : !torch.vtensor<[],si8> -> !torch.int
%3904 = torch_c.to_i64 %3903
%c1_4107 = arith.constant 1 : index
%c1_4108 = arith.constant 1 : index
%c256_4109 = arith.constant 256 : index
%c2_4110 = arith.constant 2 : index
%c14_4111 = arith.constant 14 : index
%c3_4112 = arith.constant 3 : index
%c14_4113 = arith.constant 14 : index
%3905 = tensor.empty() : tensor<1x256x14x14xi8>
%3906 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4105 : tensor<1x256x14x14xf32>) outs(%3905 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3903
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3901
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4114 = tensor.cast %3906 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4115 = tensor.cast %cast_4114 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%3907 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3908 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3909 = torch.aten.item %3907 : !torch.vtensor<[],f32> -> !torch.float
%3910 = torch_c.to_f64 %3909
%3911 = torch.aten.item %3908 : !torch.vtensor<[],si8> -> !torch.int
%3912 = torch_c.to_i64 %3911
%cast_4116 = tensor.cast %cast_4115 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4117 = arith.constant 1 : index
%c1_4118 = arith.constant 1 : index
%c256_4119 = arith.constant 256 : index
%c2_4120 = arith.constant 2 : index
%c14_4121 = arith.constant 14 : index
%c3_4122 = arith.constant 3 : index
%c14_4123 = arith.constant 14 : index
%3913 = tensor.empty() : tensor<1x256x14x14xf32>
%3914 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4116 : tensor<1x256x14x14xi8>) outs(%3913 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3911
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3909
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4124 = tensor.cast %3914 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%3915 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3916 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4125 = torch.constant.int 12
%3917 = torch.aten.item %3915 : !torch.vtensor<[],f32> -> !torch.float
%3918 = torch_c.to_f64 %3917
%3919 = torch.aten.item %3916 : !torch.vtensor<[],si8> -> !torch.int
%3920 = torch_c.to_i64 %3919
%c1_4126 = arith.constant 1 : index
%c0_4127 = arith.constant 0 : index
%c1024_4128 = arith.constant 1024 : index
%c1_4129 = arith.constant 1 : index
%c256_4130 = arith.constant 256 : index
%3921 = tensor.empty() : tensor<1024x256x1x1xi8>
%3922 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%146 : tensor<1024x256x1x1xf32>) outs(%3921 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3919
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3917
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_4131 = tensor.cast %3922 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_4132 = tensor.cast %cast_4131 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%3923 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3924 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3925 = torch.aten.item %3923 : !torch.vtensor<[],f32> -> !torch.float
%3926 = torch_c.to_f64 %3925
%3927 = torch.aten.item %3924 : !torch.vtensor<[],si8> -> !torch.int
%3928 = torch_c.to_i64 %3927
%cast_4133 = tensor.cast %cast_4132 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_4134 = arith.constant 1 : index
%c0_4135 = arith.constant 0 : index
%c1024_4136 = arith.constant 1024 : index
%c1_4137 = arith.constant 1 : index
%c256_4138 = arith.constant 256 : index
%3929 = tensor.empty() : tensor<1024x256x1x1xf32>
%3930 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4133 : tensor<1024x256x1x1xi8>) outs(%3929 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3927
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3925
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_4139 = tensor.cast %3930 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%3931 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3932 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4140 = torch.constant.int 12
%3933 = torch.aten.item %3931 : !torch.vtensor<[],f32> -> !torch.float
%3934 = torch_c.to_f64 %3933
%3935 = torch.aten.item %3932 : !torch.vtensor<[],si8> -> !torch.int
%3936 = torch_c.to_i64 %3935
%c1_4141 = arith.constant 1 : index
%c0_4142 = arith.constant 0 : index
%c1024_4143 = arith.constant 1024 : index
%3937 = tensor.empty() : tensor<1024xi8>
%3938 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%148 : tensor<1024xf32>) outs(%3937 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3935
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3933
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_4144 = tensor.cast %3938 : tensor<1024xi8> to tensor<1024xi8>
%cast_4145 = tensor.cast %cast_4144 : tensor<1024xi8> to tensor<1024xi8>
%3939 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3940 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%3941 = torch.aten.item %3939 : !torch.vtensor<[],f32> -> !torch.float
%3942 = torch_c.to_f64 %3941
%3943 = torch.aten.item %3940 : !torch.vtensor<[],si8> -> !torch.int
%3944 = torch_c.to_i64 %3943
%cast_4146 = tensor.cast %cast_4145 : tensor<1024xi8> to tensor<1024xi8>
%c1_4147 = arith.constant 1 : index
%c0_4148 = arith.constant 0 : index
%c1024_4149 = arith.constant 1024 : index
%3945 = tensor.empty() : tensor<1024xf32>
%3946 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4146 : tensor<1024xi8>) outs(%3945 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %3943
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %3941
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_4150 = tensor.cast %3946 : tensor<1024xf32> to tensor<1024xf32>
%int0_4151 = torch.constant.int 0
%int0_4152 = torch.constant.int 0
%int1_4153 = torch.constant.int 1
%int1_4154 = torch.constant.int 1
%int1_4155 = torch.constant.int 1
%int1_4156 = torch.constant.int 1
%int0_4157 = torch.constant.int 0
%3947 = torch.prim.ListConstruct %int0_4151, %int0_4152 : (!torch.int, !torch.int) -> !torch.list<int>
%3948 = torch.prim.ListConstruct %int1_4153, %int1_4154 : (!torch.int, !torch.int) -> !torch.list<int>
%3949 = torch.prim.ListConstruct %int1_4155, %int1_4156 : (!torch.int, !torch.int) -> !torch.list<int>
%3950 = torch.prim.ListConstruct %int0_4157, %int0_4157 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4158 = torch.constant.bool false
%int1_4159 = torch.constant.int 1
%3951 = torch_c.to_i64 %int1_4159
%3952 = torch_c.to_i64 %int0_4151
%3953 = torch_c.to_i64 %int0_4152
%3954 = torch_c.to_i64 %int0_4157
%3955 = torch_c.to_i64 %int0_4157
%c0_4160 = arith.constant 0 : index
%c1_4161 = arith.constant 1 : index
%c1_4162 = arith.constant 1 : index
%c256_4163 = arith.constant 256 : index
%c2_4164 = arith.constant 2 : index
%c14_4165 = arith.constant 14 : index
%c3_4166 = arith.constant 3 : index
%c14_4167 = arith.constant 14 : index
%c0_4168 = arith.constant 0 : index
%c1024_4169 = arith.constant 1024 : index
%c1_4170 = arith.constant 1 : index
%c256_4171 = arith.constant 256 : index
%c2_4172 = arith.constant 2 : index
%c1_4173 = arith.constant 1 : index
%c3_4174 = arith.constant 3 : index
%c1_4175 = arith.constant 1 : index
%3956 = arith.index_cast %3951 : i64 to index
%c0_4176 = arith.constant 0 : index
%3957 = arith.remsi %c256_4163, %3956 : index
%3958 = arith.cmpi eq, %c0_4176, %3957 : index
cf.assert %3958, "invalid: groups must divide input channel size evenly."
%c0_4177 = arith.constant 0 : index
%3959 = arith.remsi %c1024_4169, %3956 : index
%3960 = arith.cmpi eq, %c0_4177, %3959 : index
cf.assert %3960, "invalid: groups must divide weight batch size evenly."
%c1_i64_4178 = arith.constant 1 : i64
%c1_i64_4179 = arith.constant 1 : i64
%c1_i64_4180 = arith.constant 1 : i64
%c1_i64_4181 = arith.constant 1 : i64
%cst_4182 = arith.constant 0.000000e+00 : f32
%c0_4183 = arith.constant 0 : index
%c1_4184 = arith.constant 1 : index
%c1_4185 = arith.constant 1 : index
%c256_4186 = arith.constant 256 : index
%c2_4187 = arith.constant 2 : index
%c14_4188 = arith.constant 14 : index
%c3_4189 = arith.constant 3 : index
%c14_4190 = arith.constant 14 : index
%c0_i64_4191 = arith.constant 0 : i64
%3961 = arith.index_cast %c0_i64_4191 : i64 to index
%3962 = arith.index_cast %c0_i64_4191 : i64 to index
%3963 = arith.index_cast %3952 : i64 to index
%3964 = arith.index_cast %3953 : i64 to index
%padded_4192 = tensor.pad %cast_4124 low[%3961, %3962, %3963, %3964] high[%3961, %3962, %3963, %3964] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4182 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%3965 = arith.index_cast %c1_4173 : index to i64
%c1_i64_4193 = arith.constant 1 : i64
%c2_i64_4194 = arith.constant 2 : i64
%3966 = arith.muli %3952, %c2_i64_4194 : i64
%3967 = arith.index_cast %c14_4165 : index to i64
%3968 = arith.addi %3967, %3966 : i64
%3969 = arith.subi %3965, %c1_i64_4193 : i64
%3970 = arith.muli %c1_i64_4178, %3969 : i64
%3971 = arith.subi %3968, %3970 : i64
%3972 = arith.subi %3971, %c1_i64_4193 : i64
%3973 = arith.floordivsi %3972, %c1_i64_4180 : i64
%3974 = arith.addi %3973, %c1_i64_4193 : i64
%3975 = arith.index_cast %3974 : i64 to index
%3976 = arith.index_cast %c1_4175 : index to i64
%c1_i64_4195 = arith.constant 1 : i64
%c2_i64_4196 = arith.constant 2 : i64
%3977 = arith.muli %3953, %c2_i64_4196 : i64
%3978 = arith.index_cast %c14_4167 : index to i64
%3979 = arith.addi %3978, %3977 : i64
%3980 = arith.subi %3976, %c1_i64_4195 : i64
%3981 = arith.muli %c1_i64_4179, %3980 : i64
%3982 = arith.subi %3979, %3981 : i64
%3983 = arith.subi %3982, %c1_i64_4195 : i64
%3984 = arith.floordivsi %3983, %c1_i64_4181 : i64
%3985 = arith.addi %3984, %c1_i64_4195 : i64
%3986 = arith.index_cast %3985 : i64 to index
%3987 = tensor.empty(%3975, %3986) : tensor<1x1024x?x?xf32>
%3988 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4150 : tensor<1024xf32>) outs(%3987 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%3989 = arith.floordivsi %c256_4163, %3956 : index
%3990 = arith.floordivsi %c1024_4169, %3956 : index
%c0_4197 = arith.constant 0 : index
%c1_4198 = arith.constant 1 : index
%3991 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4192, %cast_4139 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3988 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_4199 = tensor.cast %3991 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%3992 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%3993 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4200 = torch.constant.int 12
%3994 = torch.aten.item %3992 : !torch.vtensor<[],f32> -> !torch.float
%3995 = torch_c.to_f64 %3994
%3996 = torch.aten.item %3993 : !torch.vtensor<[],si8> -> !torch.int
%3997 = torch_c.to_i64 %3996
%c1_4201 = arith.constant 1 : index
%c1_4202 = arith.constant 1 : index
%c1024_4203 = arith.constant 1024 : index
%c2_4204 = arith.constant 2 : index
%c14_4205 = arith.constant 14 : index
%c3_4206 = arith.constant 3 : index
%c14_4207 = arith.constant 14 : index
%3998 = tensor.empty() : tensor<1x1024x14x14xi8>
%3999 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4199 : tensor<1x1024x14x14xf32>) outs(%3998 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %3996
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %3994
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4208 = tensor.cast %3999 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4209 = tensor.cast %cast_4208 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4000 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4001 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4002 = torch.aten.item %4000 : !torch.vtensor<[],f32> -> !torch.float
%4003 = torch_c.to_f64 %4002
%4004 = torch.aten.item %4001 : !torch.vtensor<[],si8> -> !torch.int
%4005 = torch_c.to_i64 %4004
%cast_4210 = tensor.cast %cast_4209 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4211 = arith.constant 1 : index
%c1_4212 = arith.constant 1 : index
%c1024_4213 = arith.constant 1024 : index
%c2_4214 = arith.constant 2 : index
%c14_4215 = arith.constant 14 : index
%c3_4216 = arith.constant 3 : index
%c14_4217 = arith.constant 14 : index
%4006 = tensor.empty() : tensor<1x1024x14x14xf32>
%4007 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4210 : tensor<1x1024x14x14xi8>) outs(%4006 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4004
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4002
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4218 = tensor.cast %4007 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_4219 = torch.constant.int 1
%4008 = torch_c.to_i64 %int1_4219
%c1_4220 = arith.constant 1 : index
%c1_4221 = arith.constant 1 : index
%c1024_4222 = arith.constant 1024 : index
%c2_4223 = arith.constant 2 : index
%c14_4224 = arith.constant 14 : index
%c3_4225 = arith.constant 3 : index
%c14_4226 = arith.constant 14 : index
%c1_4227 = arith.constant 1 : index
%c1024_4228 = arith.constant 1024 : index
%4009 = arith.cmpi eq, %c1024_4222, %c1024_4228 : index
cf.assert %4009, "mismatched size for broadcast"
%c2_4229 = arith.constant 2 : index
%c14_4230 = arith.constant 14 : index
%4010 = arith.cmpi eq, %c14_4224, %c14_4230 : index
cf.assert %4010, "mismatched size for broadcast"
%c3_4231 = arith.constant 3 : index
%c14_4232 = arith.constant 14 : index
%4011 = arith.cmpi eq, %c14_4226, %c14_4232 : index
cf.assert %4011, "mismatched size for broadcast"
%4012 = tensor.empty() : tensor<1x1024x14x14xf32>
%4013 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4218, %cast_3912 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4012 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %4008 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4233 = tensor.cast %4013 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_4234 = arith.constant 1 : index
%c1_4235 = arith.constant 1 : index
%c1024_4236 = arith.constant 1024 : index
%c2_4237 = arith.constant 2 : index
%c14_4238 = arith.constant 14 : index
%c3_4239 = arith.constant 3 : index
%c14_4240 = arith.constant 14 : index
%4014 = tensor.empty() : tensor<1x1024x14x14xf32>
%4015 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4233 : tensor<1x1024x14x14xf32>) outs(%4014 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4241 = tensor.cast %4015 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4016 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4017 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4242 = torch.constant.int 12
%4018 = torch.aten.item %4016 : !torch.vtensor<[],f32> -> !torch.float
%4019 = torch_c.to_f64 %4018
%4020 = torch.aten.item %4017 : !torch.vtensor<[],si8> -> !torch.int
%4021 = torch_c.to_i64 %4020
%c1_4243 = arith.constant 1 : index
%c1_4244 = arith.constant 1 : index
%c1024_4245 = arith.constant 1024 : index
%c2_4246 = arith.constant 2 : index
%c14_4247 = arith.constant 14 : index
%c3_4248 = arith.constant 3 : index
%c14_4249 = arith.constant 14 : index
%4022 = tensor.empty() : tensor<1x1024x14x14xi8>
%4023 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4241 : tensor<1x1024x14x14xf32>) outs(%4022 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4020
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4018
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4250 = tensor.cast %4023 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4251 = tensor.cast %cast_4250 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4024 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4025 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4026 = torch.aten.item %4024 : !torch.vtensor<[],f32> -> !torch.float
%4027 = torch_c.to_f64 %4026
%4028 = torch.aten.item %4025 : !torch.vtensor<[],si8> -> !torch.int
%4029 = torch_c.to_i64 %4028
%cast_4252 = tensor.cast %cast_4251 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4253 = arith.constant 1 : index
%c1_4254 = arith.constant 1 : index
%c1024_4255 = arith.constant 1024 : index
%c2_4256 = arith.constant 2 : index
%c14_4257 = arith.constant 14 : index
%c3_4258 = arith.constant 3 : index
%c14_4259 = arith.constant 14 : index
%4030 = tensor.empty() : tensor<1x1024x14x14xf32>
%4031 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4252 : tensor<1x1024x14x14xi8>) outs(%4030 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4028
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4026
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4260 = tensor.cast %4031 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4032 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4033 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4261 = torch.constant.int 12
%4034 = torch.aten.item %4032 : !torch.vtensor<[],f32> -> !torch.float
%4035 = torch_c.to_f64 %4034
%4036 = torch.aten.item %4033 : !torch.vtensor<[],si8> -> !torch.int
%4037 = torch_c.to_i64 %4036
%c1_4262 = arith.constant 1 : index
%c0_4263 = arith.constant 0 : index
%c256_4264 = arith.constant 256 : index
%c1_4265 = arith.constant 1 : index
%c1024_4266 = arith.constant 1024 : index
%4038 = tensor.empty() : tensor<256x1024x1x1xi8>
%4039 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150 : tensor<256x1024x1x1xf32>) outs(%4038 : tensor<256x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4036
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4034
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x1024x1x1xi8>
%cast_4267 = tensor.cast %4039 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%cast_4268 = tensor.cast %cast_4267 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%4040 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4041 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4042 = torch.aten.item %4040 : !torch.vtensor<[],f32> -> !torch.float
%4043 = torch_c.to_f64 %4042
%4044 = torch.aten.item %4041 : !torch.vtensor<[],si8> -> !torch.int
%4045 = torch_c.to_i64 %4044
%cast_4269 = tensor.cast %cast_4268 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%c1_4270 = arith.constant 1 : index
%c0_4271 = arith.constant 0 : index
%c256_4272 = arith.constant 256 : index
%c1_4273 = arith.constant 1 : index
%c1024_4274 = arith.constant 1024 : index
%4046 = tensor.empty() : tensor<256x1024x1x1xf32>
%4047 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4269 : tensor<256x1024x1x1xi8>) outs(%4046 : tensor<256x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4044
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4042
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x1024x1x1xf32>
%cast_4275 = tensor.cast %4047 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
%4048 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4049 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4276 = torch.constant.int 12
%4050 = torch.aten.item %4048 : !torch.vtensor<[],f32> -> !torch.float
%4051 = torch_c.to_f64 %4050
%4052 = torch.aten.item %4049 : !torch.vtensor<[],si8> -> !torch.int
%4053 = torch_c.to_i64 %4052
%c1_4277 = arith.constant 1 : index
%c0_4278 = arith.constant 0 : index
%c256_4279 = arith.constant 256 : index
%4054 = tensor.empty() : tensor<256xi8>
%4055 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%152 : tensor<256xf32>) outs(%4054 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4052
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4050
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_4280 = tensor.cast %4055 : tensor<256xi8> to tensor<256xi8>
%cast_4281 = tensor.cast %cast_4280 : tensor<256xi8> to tensor<256xi8>
%4056 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4057 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4058 = torch.aten.item %4056 : !torch.vtensor<[],f32> -> !torch.float
%4059 = torch_c.to_f64 %4058
%4060 = torch.aten.item %4057 : !torch.vtensor<[],si8> -> !torch.int
%4061 = torch_c.to_i64 %4060
%cast_4282 = tensor.cast %cast_4281 : tensor<256xi8> to tensor<256xi8>
%c1_4283 = arith.constant 1 : index
%c0_4284 = arith.constant 0 : index
%c256_4285 = arith.constant 256 : index
%4062 = tensor.empty() : tensor<256xf32>
%4063 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4282 : tensor<256xi8>) outs(%4062 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4060
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4058
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_4286 = tensor.cast %4063 : tensor<256xf32> to tensor<256xf32>
%int0_4287 = torch.constant.int 0
%int0_4288 = torch.constant.int 0
%int1_4289 = torch.constant.int 1
%int1_4290 = torch.constant.int 1
%int1_4291 = torch.constant.int 1
%int1_4292 = torch.constant.int 1
%int0_4293 = torch.constant.int 0
%4064 = torch.prim.ListConstruct %int0_4287, %int0_4288 : (!torch.int, !torch.int) -> !torch.list<int>
%4065 = torch.prim.ListConstruct %int1_4289, %int1_4290 : (!torch.int, !torch.int) -> !torch.list<int>
%4066 = torch.prim.ListConstruct %int1_4291, %int1_4292 : (!torch.int, !torch.int) -> !torch.list<int>
%4067 = torch.prim.ListConstruct %int0_4293, %int0_4293 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4294 = torch.constant.bool false
%int1_4295 = torch.constant.int 1
%4068 = torch_c.to_i64 %int1_4295
%4069 = torch_c.to_i64 %int0_4287
%4070 = torch_c.to_i64 %int0_4288
%4071 = torch_c.to_i64 %int0_4293
%4072 = torch_c.to_i64 %int0_4293
%c0_4296 = arith.constant 0 : index
%c1_4297 = arith.constant 1 : index
%c1_4298 = arith.constant 1 : index
%c1024_4299 = arith.constant 1024 : index
%c2_4300 = arith.constant 2 : index
%c14_4301 = arith.constant 14 : index
%c3_4302 = arith.constant 3 : index
%c14_4303 = arith.constant 14 : index
%c0_4304 = arith.constant 0 : index
%c256_4305 = arith.constant 256 : index
%c1_4306 = arith.constant 1 : index
%c1024_4307 = arith.constant 1024 : index
%c2_4308 = arith.constant 2 : index
%c1_4309 = arith.constant 1 : index
%c3_4310 = arith.constant 3 : index
%c1_4311 = arith.constant 1 : index
%4073 = arith.index_cast %4068 : i64 to index
%c0_4312 = arith.constant 0 : index
%4074 = arith.remsi %c1024_4299, %4073 : index
%4075 = arith.cmpi eq, %c0_4312, %4074 : index
cf.assert %4075, "invalid: groups must divide input channel size evenly."
%c0_4313 = arith.constant 0 : index
%4076 = arith.remsi %c256_4305, %4073 : index
%4077 = arith.cmpi eq, %c0_4313, %4076 : index
cf.assert %4077, "invalid: groups must divide weight batch size evenly."
%c1_i64_4314 = arith.constant 1 : i64
%c1_i64_4315 = arith.constant 1 : i64
%c1_i64_4316 = arith.constant 1 : i64
%c1_i64_4317 = arith.constant 1 : i64
%cst_4318 = arith.constant 0.000000e+00 : f32
%c0_4319 = arith.constant 0 : index
%c1_4320 = arith.constant 1 : index
%c1_4321 = arith.constant 1 : index
%c1024_4322 = arith.constant 1024 : index
%c2_4323 = arith.constant 2 : index
%c14_4324 = arith.constant 14 : index
%c3_4325 = arith.constant 3 : index
%c14_4326 = arith.constant 14 : index
%c0_i64_4327 = arith.constant 0 : i64
%4078 = arith.index_cast %c0_i64_4327 : i64 to index
%4079 = arith.index_cast %c0_i64_4327 : i64 to index
%4080 = arith.index_cast %4069 : i64 to index
%4081 = arith.index_cast %4070 : i64 to index
%padded_4328 = tensor.pad %cast_4260 low[%4078, %4079, %4080, %4081] high[%4078, %4079, %4080, %4081] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4318 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%4082 = arith.index_cast %c1_4309 : index to i64
%c1_i64_4329 = arith.constant 1 : i64
%c2_i64_4330 = arith.constant 2 : i64
%4083 = arith.muli %4069, %c2_i64_4330 : i64
%4084 = arith.index_cast %c14_4301 : index to i64
%4085 = arith.addi %4084, %4083 : i64
%4086 = arith.subi %4082, %c1_i64_4329 : i64
%4087 = arith.muli %c1_i64_4314, %4086 : i64
%4088 = arith.subi %4085, %4087 : i64
%4089 = arith.subi %4088, %c1_i64_4329 : i64
%4090 = arith.floordivsi %4089, %c1_i64_4316 : i64
%4091 = arith.addi %4090, %c1_i64_4329 : i64
%4092 = arith.index_cast %4091 : i64 to index
%4093 = arith.index_cast %c1_4311 : index to i64
%c1_i64_4331 = arith.constant 1 : i64
%c2_i64_4332 = arith.constant 2 : i64
%4094 = arith.muli %4070, %c2_i64_4332 : i64
%4095 = arith.index_cast %c14_4303 : index to i64
%4096 = arith.addi %4095, %4094 : i64
%4097 = arith.subi %4093, %c1_i64_4331 : i64
%4098 = arith.muli %c1_i64_4315, %4097 : i64
%4099 = arith.subi %4096, %4098 : i64
%4100 = arith.subi %4099, %c1_i64_4331 : i64
%4101 = arith.floordivsi %4100, %c1_i64_4317 : i64
%4102 = arith.addi %4101, %c1_i64_4331 : i64
%4103 = arith.index_cast %4102 : i64 to index
%4104 = tensor.empty(%4092, %4103) : tensor<1x256x?x?xf32>
%4105 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4286 : tensor<256xf32>) outs(%4104 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%4106 = arith.floordivsi %c1024_4299, %4073 : index
%4107 = arith.floordivsi %c256_4305, %4073 : index
%c0_4333 = arith.constant 0 : index
%c1_4334 = arith.constant 1 : index
%4108 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4328, %cast_4275 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%4105 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_4335 = tensor.cast %4108 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_4336 = arith.constant 1 : index
%c1_4337 = arith.constant 1 : index
%c256_4338 = arith.constant 256 : index
%c2_4339 = arith.constant 2 : index
%c14_4340 = arith.constant 14 : index
%c3_4341 = arith.constant 3 : index
%c14_4342 = arith.constant 14 : index
%4109 = tensor.empty() : tensor<1x256x14x14xf32>
%4110 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4335 : tensor<1x256x14x14xf32>) outs(%4109 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_4343 = tensor.cast %4110 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4111 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4112 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4344 = torch.constant.int 12
%4113 = torch.aten.item %4111 : !torch.vtensor<[],f32> -> !torch.float
%4114 = torch_c.to_f64 %4113
%4115 = torch.aten.item %4112 : !torch.vtensor<[],si8> -> !torch.int
%4116 = torch_c.to_i64 %4115
%c1_4345 = arith.constant 1 : index
%c1_4346 = arith.constant 1 : index
%c256_4347 = arith.constant 256 : index
%c2_4348 = arith.constant 2 : index
%c14_4349 = arith.constant 14 : index
%c3_4350 = arith.constant 3 : index
%c14_4351 = arith.constant 14 : index
%4117 = tensor.empty() : tensor<1x256x14x14xi8>
%4118 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4343 : tensor<1x256x14x14xf32>) outs(%4117 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4115
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4113
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4352 = tensor.cast %4118 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4353 = tensor.cast %cast_4352 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%4119 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4120 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4121 = torch.aten.item %4119 : !torch.vtensor<[],f32> -> !torch.float
%4122 = torch_c.to_f64 %4121
%4123 = torch.aten.item %4120 : !torch.vtensor<[],si8> -> !torch.int
%4124 = torch_c.to_i64 %4123
%cast_4354 = tensor.cast %cast_4353 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4355 = arith.constant 1 : index
%c1_4356 = arith.constant 1 : index
%c256_4357 = arith.constant 256 : index
%c2_4358 = arith.constant 2 : index
%c14_4359 = arith.constant 14 : index
%c3_4360 = arith.constant 3 : index
%c14_4361 = arith.constant 14 : index
%4125 = tensor.empty() : tensor<1x256x14x14xf32>
%4126 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4354 : tensor<1x256x14x14xi8>) outs(%4125 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4123
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4121
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4362 = tensor.cast %4126 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4127 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4128 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4363 = torch.constant.int 12
%4129 = torch.aten.item %4127 : !torch.vtensor<[],f32> -> !torch.float
%4130 = torch_c.to_f64 %4129
%4131 = torch.aten.item %4128 : !torch.vtensor<[],si8> -> !torch.int
%4132 = torch_c.to_i64 %4131
%c1_4364 = arith.constant 1 : index
%c0_4365 = arith.constant 0 : index
%c256_4366 = arith.constant 256 : index
%c1_4367 = arith.constant 1 : index
%c256_4368 = arith.constant 256 : index
%c2_4369 = arith.constant 2 : index
%c3_4370 = arith.constant 3 : index
%c3_4371 = arith.constant 3 : index
%c3_4372 = arith.constant 3 : index
%4133 = tensor.empty() : tensor<256x256x3x3xi8>
%4134 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%154 : tensor<256x256x3x3xf32>) outs(%4133 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4131
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4129
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_4373 = tensor.cast %4134 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_4374 = tensor.cast %cast_4373 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%4135 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4136 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4137 = torch.aten.item %4135 : !torch.vtensor<[],f32> -> !torch.float
%4138 = torch_c.to_f64 %4137
%4139 = torch.aten.item %4136 : !torch.vtensor<[],si8> -> !torch.int
%4140 = torch_c.to_i64 %4139
%cast_4375 = tensor.cast %cast_4374 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_4376 = arith.constant 1 : index
%c0_4377 = arith.constant 0 : index
%c256_4378 = arith.constant 256 : index
%c1_4379 = arith.constant 1 : index
%c256_4380 = arith.constant 256 : index
%c2_4381 = arith.constant 2 : index
%c3_4382 = arith.constant 3 : index
%c3_4383 = arith.constant 3 : index
%c3_4384 = arith.constant 3 : index
%4141 = tensor.empty() : tensor<256x256x3x3xf32>
%4142 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4375 : tensor<256x256x3x3xi8>) outs(%4141 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4139
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4137
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_4385 = tensor.cast %4142 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%4143 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4144 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4386 = torch.constant.int 12
%4145 = torch.aten.item %4143 : !torch.vtensor<[],f32> -> !torch.float
%4146 = torch_c.to_f64 %4145
%4147 = torch.aten.item %4144 : !torch.vtensor<[],si8> -> !torch.int
%4148 = torch_c.to_i64 %4147
%c1_4387 = arith.constant 1 : index
%c0_4388 = arith.constant 0 : index
%c256_4389 = arith.constant 256 : index
%4149 = tensor.empty() : tensor<256xi8>
%4150 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%156 : tensor<256xf32>) outs(%4149 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4147
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4145
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_4390 = tensor.cast %4150 : tensor<256xi8> to tensor<256xi8>
%cast_4391 = tensor.cast %cast_4390 : tensor<256xi8> to tensor<256xi8>
%4151 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4152 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4153 = torch.aten.item %4151 : !torch.vtensor<[],f32> -> !torch.float
%4154 = torch_c.to_f64 %4153
%4155 = torch.aten.item %4152 : !torch.vtensor<[],si8> -> !torch.int
%4156 = torch_c.to_i64 %4155
%cast_4392 = tensor.cast %cast_4391 : tensor<256xi8> to tensor<256xi8>
%c1_4393 = arith.constant 1 : index
%c0_4394 = arith.constant 0 : index
%c256_4395 = arith.constant 256 : index
%4157 = tensor.empty() : tensor<256xf32>
%4158 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4392 : tensor<256xi8>) outs(%4157 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4155
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4153
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_4396 = tensor.cast %4158 : tensor<256xf32> to tensor<256xf32>
%int1_4397 = torch.constant.int 1
%int1_4398 = torch.constant.int 1
%int1_4399 = torch.constant.int 1
%int1_4400 = torch.constant.int 1
%int1_4401 = torch.constant.int 1
%int1_4402 = torch.constant.int 1
%int0_4403 = torch.constant.int 0
%4159 = torch.prim.ListConstruct %int1_4397, %int1_4398 : (!torch.int, !torch.int) -> !torch.list<int>
%4160 = torch.prim.ListConstruct %int1_4399, %int1_4400 : (!torch.int, !torch.int) -> !torch.list<int>
%4161 = torch.prim.ListConstruct %int1_4401, %int1_4402 : (!torch.int, !torch.int) -> !torch.list<int>
%4162 = torch.prim.ListConstruct %int0_4403, %int0_4403 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4404 = torch.constant.bool false
%int1_4405 = torch.constant.int 1
%4163 = torch_c.to_i64 %int1_4405
%4164 = torch_c.to_i64 %int1_4397
%4165 = torch_c.to_i64 %int1_4398
%4166 = torch_c.to_i64 %int0_4403
%4167 = torch_c.to_i64 %int0_4403
%c0_4406 = arith.constant 0 : index
%c1_4407 = arith.constant 1 : index
%c1_4408 = arith.constant 1 : index
%c256_4409 = arith.constant 256 : index
%c2_4410 = arith.constant 2 : index
%c14_4411 = arith.constant 14 : index
%c3_4412 = arith.constant 3 : index
%c14_4413 = arith.constant 14 : index
%c0_4414 = arith.constant 0 : index
%c256_4415 = arith.constant 256 : index
%c1_4416 = arith.constant 1 : index
%c256_4417 = arith.constant 256 : index
%c2_4418 = arith.constant 2 : index
%c3_4419 = arith.constant 3 : index
%c3_4420 = arith.constant 3 : index
%c3_4421 = arith.constant 3 : index
%4168 = arith.index_cast %4163 : i64 to index
%c0_4422 = arith.constant 0 : index
%4169 = arith.remsi %c256_4409, %4168 : index
%4170 = arith.cmpi eq, %c0_4422, %4169 : index
cf.assert %4170, "invalid: groups must divide input channel size evenly."
%c0_4423 = arith.constant 0 : index
%4171 = arith.remsi %c256_4415, %4168 : index
%4172 = arith.cmpi eq, %c0_4423, %4171 : index
cf.assert %4172, "invalid: groups must divide weight batch size evenly."
%c1_i64_4424 = arith.constant 1 : i64
%c1_i64_4425 = arith.constant 1 : i64
%c1_i64_4426 = arith.constant 1 : i64
%c1_i64_4427 = arith.constant 1 : i64
%cst_4428 = arith.constant 0.000000e+00 : f32
%c0_4429 = arith.constant 0 : index
%c1_4430 = arith.constant 1 : index
%c1_4431 = arith.constant 1 : index
%c256_4432 = arith.constant 256 : index
%c2_4433 = arith.constant 2 : index
%c14_4434 = arith.constant 14 : index
%c3_4435 = arith.constant 3 : index
%c14_4436 = arith.constant 14 : index
%c0_i64_4437 = arith.constant 0 : i64
%4173 = arith.index_cast %c0_i64_4437 : i64 to index
%4174 = arith.index_cast %c0_i64_4437 : i64 to index
%4175 = arith.index_cast %4164 : i64 to index
%4176 = arith.index_cast %4165 : i64 to index
%padded_4438 = tensor.pad %cast_4362 low[%4173, %4174, %4175, %4176] high[%4173, %4174, %4175, %4176] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4428 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%4177 = arith.index_cast %c3_4419 : index to i64
%c1_i64_4439 = arith.constant 1 : i64
%c2_i64_4440 = arith.constant 2 : i64
%4178 = arith.muli %4164, %c2_i64_4440 : i64
%4179 = arith.index_cast %c14_4411 : index to i64
%4180 = arith.addi %4179, %4178 : i64
%4181 = arith.subi %4177, %c1_i64_4439 : i64
%4182 = arith.muli %c1_i64_4424, %4181 : i64
%4183 = arith.subi %4180, %4182 : i64
%4184 = arith.subi %4183, %c1_i64_4439 : i64
%4185 = arith.floordivsi %4184, %c1_i64_4426 : i64
%4186 = arith.addi %4185, %c1_i64_4439 : i64
%4187 = arith.index_cast %4186 : i64 to index
%4188 = arith.index_cast %c3_4421 : index to i64
%c1_i64_4441 = arith.constant 1 : i64
%c2_i64_4442 = arith.constant 2 : i64
%4189 = arith.muli %4165, %c2_i64_4442 : i64
%4190 = arith.index_cast %c14_4413 : index to i64
%4191 = arith.addi %4190, %4189 : i64
%4192 = arith.subi %4188, %c1_i64_4441 : i64
%4193 = arith.muli %c1_i64_4425, %4192 : i64
%4194 = arith.subi %4191, %4193 : i64
%4195 = arith.subi %4194, %c1_i64_4441 : i64
%4196 = arith.floordivsi %4195, %c1_i64_4427 : i64
%4197 = arith.addi %4196, %c1_i64_4441 : i64
%4198 = arith.index_cast %4197 : i64 to index
%4199 = tensor.empty(%4187, %4198) : tensor<1x256x?x?xf32>
%4200 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4396 : tensor<256xf32>) outs(%4199 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%4201 = arith.floordivsi %c256_4409, %4168 : index
%4202 = arith.floordivsi %c256_4415, %4168 : index
%c0_4443 = arith.constant 0 : index
%c1_4444 = arith.constant 1 : index
%4203 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4438, %cast_4385 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%4200 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_4445 = tensor.cast %4203 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_4446 = arith.constant 1 : index
%c1_4447 = arith.constant 1 : index
%c256_4448 = arith.constant 256 : index
%c2_4449 = arith.constant 2 : index
%c14_4450 = arith.constant 14 : index
%c3_4451 = arith.constant 3 : index
%c14_4452 = arith.constant 14 : index
%4204 = tensor.empty() : tensor<1x256x14x14xf32>
%4205 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4445 : tensor<1x256x14x14xf32>) outs(%4204 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_4453 = tensor.cast %4205 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4206 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4207 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4454 = torch.constant.int 12
%4208 = torch.aten.item %4206 : !torch.vtensor<[],f32> -> !torch.float
%4209 = torch_c.to_f64 %4208
%4210 = torch.aten.item %4207 : !torch.vtensor<[],si8> -> !torch.int
%4211 = torch_c.to_i64 %4210
%c1_4455 = arith.constant 1 : index
%c1_4456 = arith.constant 1 : index
%c256_4457 = arith.constant 256 : index
%c2_4458 = arith.constant 2 : index
%c14_4459 = arith.constant 14 : index
%c3_4460 = arith.constant 3 : index
%c14_4461 = arith.constant 14 : index
%4212 = tensor.empty() : tensor<1x256x14x14xi8>
%4213 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4453 : tensor<1x256x14x14xf32>) outs(%4212 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4210
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4208
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4462 = tensor.cast %4213 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4463 = tensor.cast %cast_4462 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%4214 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4215 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4216 = torch.aten.item %4214 : !torch.vtensor<[],f32> -> !torch.float
%4217 = torch_c.to_f64 %4216
%4218 = torch.aten.item %4215 : !torch.vtensor<[],si8> -> !torch.int
%4219 = torch_c.to_i64 %4218
%cast_4464 = tensor.cast %cast_4463 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4465 = arith.constant 1 : index
%c1_4466 = arith.constant 1 : index
%c256_4467 = arith.constant 256 : index
%c2_4468 = arith.constant 2 : index
%c14_4469 = arith.constant 14 : index
%c3_4470 = arith.constant 3 : index
%c14_4471 = arith.constant 14 : index
%4220 = tensor.empty() : tensor<1x256x14x14xf32>
%4221 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4464 : tensor<1x256x14x14xi8>) outs(%4220 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4218
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4216
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4472 = tensor.cast %4221 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4222 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4223 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4473 = torch.constant.int 12
%4224 = torch.aten.item %4222 : !torch.vtensor<[],f32> -> !torch.float
%4225 = torch_c.to_f64 %4224
%4226 = torch.aten.item %4223 : !torch.vtensor<[],si8> -> !torch.int
%4227 = torch_c.to_i64 %4226
%c1_4474 = arith.constant 1 : index
%c0_4475 = arith.constant 0 : index
%c1024_4476 = arith.constant 1024 : index
%c1_4477 = arith.constant 1 : index
%c256_4478 = arith.constant 256 : index
%4228 = tensor.empty() : tensor<1024x256x1x1xi8>
%4229 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%158 : tensor<1024x256x1x1xf32>) outs(%4228 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4226
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4224
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_4479 = tensor.cast %4229 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_4480 = tensor.cast %cast_4479 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%4230 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4231 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4232 = torch.aten.item %4230 : !torch.vtensor<[],f32> -> !torch.float
%4233 = torch_c.to_f64 %4232
%4234 = torch.aten.item %4231 : !torch.vtensor<[],si8> -> !torch.int
%4235 = torch_c.to_i64 %4234
%cast_4481 = tensor.cast %cast_4480 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_4482 = arith.constant 1 : index
%c0_4483 = arith.constant 0 : index
%c1024_4484 = arith.constant 1024 : index
%c1_4485 = arith.constant 1 : index
%c256_4486 = arith.constant 256 : index
%4236 = tensor.empty() : tensor<1024x256x1x1xf32>
%4237 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4481 : tensor<1024x256x1x1xi8>) outs(%4236 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4234
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4232
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_4487 = tensor.cast %4237 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%4238 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4239 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4488 = torch.constant.int 12
%4240 = torch.aten.item %4238 : !torch.vtensor<[],f32> -> !torch.float
%4241 = torch_c.to_f64 %4240
%4242 = torch.aten.item %4239 : !torch.vtensor<[],si8> -> !torch.int
%4243 = torch_c.to_i64 %4242
%c1_4489 = arith.constant 1 : index
%c0_4490 = arith.constant 0 : index
%c1024_4491 = arith.constant 1024 : index
%4244 = tensor.empty() : tensor<1024xi8>
%4245 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%160 : tensor<1024xf32>) outs(%4244 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4242
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4240
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_4492 = tensor.cast %4245 : tensor<1024xi8> to tensor<1024xi8>
%cast_4493 = tensor.cast %cast_4492 : tensor<1024xi8> to tensor<1024xi8>
%4246 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4247 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4248 = torch.aten.item %4246 : !torch.vtensor<[],f32> -> !torch.float
%4249 = torch_c.to_f64 %4248
%4250 = torch.aten.item %4247 : !torch.vtensor<[],si8> -> !torch.int
%4251 = torch_c.to_i64 %4250
%cast_4494 = tensor.cast %cast_4493 : tensor<1024xi8> to tensor<1024xi8>
%c1_4495 = arith.constant 1 : index
%c0_4496 = arith.constant 0 : index
%c1024_4497 = arith.constant 1024 : index
%4252 = tensor.empty() : tensor<1024xf32>
%4253 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4494 : tensor<1024xi8>) outs(%4252 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4250
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4248
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_4498 = tensor.cast %4253 : tensor<1024xf32> to tensor<1024xf32>
%int0_4499 = torch.constant.int 0
%int0_4500 = torch.constant.int 0
%int1_4501 = torch.constant.int 1
%int1_4502 = torch.constant.int 1
%int1_4503 = torch.constant.int 1
%int1_4504 = torch.constant.int 1
%int0_4505 = torch.constant.int 0
%4254 = torch.prim.ListConstruct %int0_4499, %int0_4500 : (!torch.int, !torch.int) -> !torch.list<int>
%4255 = torch.prim.ListConstruct %int1_4501, %int1_4502 : (!torch.int, !torch.int) -> !torch.list<int>
%4256 = torch.prim.ListConstruct %int1_4503, %int1_4504 : (!torch.int, !torch.int) -> !torch.list<int>
%4257 = torch.prim.ListConstruct %int0_4505, %int0_4505 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4506 = torch.constant.bool false
%int1_4507 = torch.constant.int 1
%4258 = torch_c.to_i64 %int1_4507
%4259 = torch_c.to_i64 %int0_4499
%4260 = torch_c.to_i64 %int0_4500
%4261 = torch_c.to_i64 %int0_4505
%4262 = torch_c.to_i64 %int0_4505
%c0_4508 = arith.constant 0 : index
%c1_4509 = arith.constant 1 : index
%c1_4510 = arith.constant 1 : index
%c256_4511 = arith.constant 256 : index
%c2_4512 = arith.constant 2 : index
%c14_4513 = arith.constant 14 : index
%c3_4514 = arith.constant 3 : index
%c14_4515 = arith.constant 14 : index
%c0_4516 = arith.constant 0 : index
%c1024_4517 = arith.constant 1024 : index
%c1_4518 = arith.constant 1 : index
%c256_4519 = arith.constant 256 : index
%c2_4520 = arith.constant 2 : index
%c1_4521 = arith.constant 1 : index
%c3_4522 = arith.constant 3 : index
%c1_4523 = arith.constant 1 : index
%4263 = arith.index_cast %4258 : i64 to index
%c0_4524 = arith.constant 0 : index
%4264 = arith.remsi %c256_4511, %4263 : index
%4265 = arith.cmpi eq, %c0_4524, %4264 : index
cf.assert %4265, "invalid: groups must divide input channel size evenly."
%c0_4525 = arith.constant 0 : index
%4266 = arith.remsi %c1024_4517, %4263 : index
%4267 = arith.cmpi eq, %c0_4525, %4266 : index
cf.assert %4267, "invalid: groups must divide weight batch size evenly."
%c1_i64_4526 = arith.constant 1 : i64
%c1_i64_4527 = arith.constant 1 : i64
%c1_i64_4528 = arith.constant 1 : i64
%c1_i64_4529 = arith.constant 1 : i64
%cst_4530 = arith.constant 0.000000e+00 : f32
%c0_4531 = arith.constant 0 : index
%c1_4532 = arith.constant 1 : index
%c1_4533 = arith.constant 1 : index
%c256_4534 = arith.constant 256 : index
%c2_4535 = arith.constant 2 : index
%c14_4536 = arith.constant 14 : index
%c3_4537 = arith.constant 3 : index
%c14_4538 = arith.constant 14 : index
%c0_i64_4539 = arith.constant 0 : i64
%4268 = arith.index_cast %c0_i64_4539 : i64 to index
%4269 = arith.index_cast %c0_i64_4539 : i64 to index
%4270 = arith.index_cast %4259 : i64 to index
%4271 = arith.index_cast %4260 : i64 to index
%padded_4540 = tensor.pad %cast_4472 low[%4268, %4269, %4270, %4271] high[%4268, %4269, %4270, %4271] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4530 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%4272 = arith.index_cast %c1_4521 : index to i64
%c1_i64_4541 = arith.constant 1 : i64
%c2_i64_4542 = arith.constant 2 : i64
%4273 = arith.muli %4259, %c2_i64_4542 : i64
%4274 = arith.index_cast %c14_4513 : index to i64
%4275 = arith.addi %4274, %4273 : i64
%4276 = arith.subi %4272, %c1_i64_4541 : i64
%4277 = arith.muli %c1_i64_4526, %4276 : i64
%4278 = arith.subi %4275, %4277 : i64
%4279 = arith.subi %4278, %c1_i64_4541 : i64
%4280 = arith.floordivsi %4279, %c1_i64_4528 : i64
%4281 = arith.addi %4280, %c1_i64_4541 : i64
%4282 = arith.index_cast %4281 : i64 to index
%4283 = arith.index_cast %c1_4523 : index to i64
%c1_i64_4543 = arith.constant 1 : i64
%c2_i64_4544 = arith.constant 2 : i64
%4284 = arith.muli %4260, %c2_i64_4544 : i64
%4285 = arith.index_cast %c14_4515 : index to i64
%4286 = arith.addi %4285, %4284 : i64
%4287 = arith.subi %4283, %c1_i64_4543 : i64
%4288 = arith.muli %c1_i64_4527, %4287 : i64
%4289 = arith.subi %4286, %4288 : i64
%4290 = arith.subi %4289, %c1_i64_4543 : i64
%4291 = arith.floordivsi %4290, %c1_i64_4529 : i64
%4292 = arith.addi %4291, %c1_i64_4543 : i64
%4293 = arith.index_cast %4292 : i64 to index
%4294 = tensor.empty(%4282, %4293) : tensor<1x1024x?x?xf32>
%4295 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4498 : tensor<1024xf32>) outs(%4294 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%4296 = arith.floordivsi %c256_4511, %4263 : index
%4297 = arith.floordivsi %c1024_4517, %4263 : index
%c0_4545 = arith.constant 0 : index
%c1_4546 = arith.constant 1 : index
%4298 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4540, %cast_4487 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%4295 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_4547 = tensor.cast %4298 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%4299 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4300 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4548 = torch.constant.int 12
%4301 = torch.aten.item %4299 : !torch.vtensor<[],f32> -> !torch.float
%4302 = torch_c.to_f64 %4301
%4303 = torch.aten.item %4300 : !torch.vtensor<[],si8> -> !torch.int
%4304 = torch_c.to_i64 %4303
%c1_4549 = arith.constant 1 : index
%c1_4550 = arith.constant 1 : index
%c1024_4551 = arith.constant 1024 : index
%c2_4552 = arith.constant 2 : index
%c14_4553 = arith.constant 14 : index
%c3_4554 = arith.constant 3 : index
%c14_4555 = arith.constant 14 : index
%4305 = tensor.empty() : tensor<1x1024x14x14xi8>
%4306 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4547 : tensor<1x1024x14x14xf32>) outs(%4305 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4303
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4301
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4556 = tensor.cast %4306 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4557 = tensor.cast %cast_4556 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4307 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4308 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4309 = torch.aten.item %4307 : !torch.vtensor<[],f32> -> !torch.float
%4310 = torch_c.to_f64 %4309
%4311 = torch.aten.item %4308 : !torch.vtensor<[],si8> -> !torch.int
%4312 = torch_c.to_i64 %4311
%cast_4558 = tensor.cast %cast_4557 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4559 = arith.constant 1 : index
%c1_4560 = arith.constant 1 : index
%c1024_4561 = arith.constant 1024 : index
%c2_4562 = arith.constant 2 : index
%c14_4563 = arith.constant 14 : index
%c3_4564 = arith.constant 3 : index
%c14_4565 = arith.constant 14 : index
%4313 = tensor.empty() : tensor<1x1024x14x14xf32>
%4314 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4558 : tensor<1x1024x14x14xi8>) outs(%4313 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4311
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4309
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4566 = tensor.cast %4314 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_4567 = torch.constant.int 1
%4315 = torch_c.to_i64 %int1_4567
%c1_4568 = arith.constant 1 : index
%c1_4569 = arith.constant 1 : index
%c1024_4570 = arith.constant 1024 : index
%c2_4571 = arith.constant 2 : index
%c14_4572 = arith.constant 14 : index
%c3_4573 = arith.constant 3 : index
%c14_4574 = arith.constant 14 : index
%c1_4575 = arith.constant 1 : index
%c1024_4576 = arith.constant 1024 : index
%4316 = arith.cmpi eq, %c1024_4570, %c1024_4576 : index
cf.assert %4316, "mismatched size for broadcast"
%c2_4577 = arith.constant 2 : index
%c14_4578 = arith.constant 14 : index
%4317 = arith.cmpi eq, %c14_4572, %c14_4578 : index
cf.assert %4317, "mismatched size for broadcast"
%c3_4579 = arith.constant 3 : index
%c14_4580 = arith.constant 14 : index
%4318 = arith.cmpi eq, %c14_4574, %c14_4580 : index
cf.assert %4318, "mismatched size for broadcast"
%4319 = tensor.empty() : tensor<1x1024x14x14xf32>
%4320 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4566, %cast_4260 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4319 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %4315 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4581 = tensor.cast %4320 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_4582 = arith.constant 1 : index
%c1_4583 = arith.constant 1 : index
%c1024_4584 = arith.constant 1024 : index
%c2_4585 = arith.constant 2 : index
%c14_4586 = arith.constant 14 : index
%c3_4587 = arith.constant 3 : index
%c14_4588 = arith.constant 14 : index
%4321 = tensor.empty() : tensor<1x1024x14x14xf32>
%4322 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4581 : tensor<1x1024x14x14xf32>) outs(%4321 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4589 = tensor.cast %4322 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4323 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4324 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4590 = torch.constant.int 12
%4325 = torch.aten.item %4323 : !torch.vtensor<[],f32> -> !torch.float
%4326 = torch_c.to_f64 %4325
%4327 = torch.aten.item %4324 : !torch.vtensor<[],si8> -> !torch.int
%4328 = torch_c.to_i64 %4327
%c1_4591 = arith.constant 1 : index
%c1_4592 = arith.constant 1 : index
%c1024_4593 = arith.constant 1024 : index
%c2_4594 = arith.constant 2 : index
%c14_4595 = arith.constant 14 : index
%c3_4596 = arith.constant 3 : index
%c14_4597 = arith.constant 14 : index
%4329 = tensor.empty() : tensor<1x1024x14x14xi8>
%4330 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4589 : tensor<1x1024x14x14xf32>) outs(%4329 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4327
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4325
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4598 = tensor.cast %4330 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4599 = tensor.cast %cast_4598 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4331 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4332 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4333 = torch.aten.item %4331 : !torch.vtensor<[],f32> -> !torch.float
%4334 = torch_c.to_f64 %4333
%4335 = torch.aten.item %4332 : !torch.vtensor<[],si8> -> !torch.int
%4336 = torch_c.to_i64 %4335
%cast_4600 = tensor.cast %cast_4599 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4601 = arith.constant 1 : index
%c1_4602 = arith.constant 1 : index
%c1024_4603 = arith.constant 1024 : index
%c2_4604 = arith.constant 2 : index
%c14_4605 = arith.constant 14 : index
%c3_4606 = arith.constant 3 : index
%c14_4607 = arith.constant 14 : index
%4337 = tensor.empty() : tensor<1x1024x14x14xf32>
%4338 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4600 : tensor<1x1024x14x14xi8>) outs(%4337 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4335
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4333
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4608 = tensor.cast %4338 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4339 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4340 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4609 = torch.constant.int 12
%4341 = torch.aten.item %4339 : !torch.vtensor<[],f32> -> !torch.float
%4342 = torch_c.to_f64 %4341
%4343 = torch.aten.item %4340 : !torch.vtensor<[],si8> -> !torch.int
%4344 = torch_c.to_i64 %4343
%c1_4610 = arith.constant 1 : index
%c0_4611 = arith.constant 0 : index
%c256_4612 = arith.constant 256 : index
%c1_4613 = arith.constant 1 : index
%c1024_4614 = arith.constant 1024 : index
%4345 = tensor.empty() : tensor<256x1024x1x1xi8>
%4346 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%162 : tensor<256x1024x1x1xf32>) outs(%4345 : tensor<256x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4343
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4341
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x1024x1x1xi8>
%cast_4615 = tensor.cast %4346 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%cast_4616 = tensor.cast %cast_4615 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%4347 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4348 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4349 = torch.aten.item %4347 : !torch.vtensor<[],f32> -> !torch.float
%4350 = torch_c.to_f64 %4349
%4351 = torch.aten.item %4348 : !torch.vtensor<[],si8> -> !torch.int
%4352 = torch_c.to_i64 %4351
%cast_4617 = tensor.cast %cast_4616 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
%c1_4618 = arith.constant 1 : index
%c0_4619 = arith.constant 0 : index
%c256_4620 = arith.constant 256 : index
%c1_4621 = arith.constant 1 : index
%c1024_4622 = arith.constant 1024 : index
%4353 = tensor.empty() : tensor<256x1024x1x1xf32>
%4354 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4617 : tensor<256x1024x1x1xi8>) outs(%4353 : tensor<256x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4351
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4349
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x1024x1x1xf32>
%cast_4623 = tensor.cast %4354 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
%4355 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4356 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4624 = torch.constant.int 12
%4357 = torch.aten.item %4355 : !torch.vtensor<[],f32> -> !torch.float
%4358 = torch_c.to_f64 %4357
%4359 = torch.aten.item %4356 : !torch.vtensor<[],si8> -> !torch.int
%4360 = torch_c.to_i64 %4359
%c1_4625 = arith.constant 1 : index
%c0_4626 = arith.constant 0 : index
%c256_4627 = arith.constant 256 : index
%4361 = tensor.empty() : tensor<256xi8>
%4362 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%164 : tensor<256xf32>) outs(%4361 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4359
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4357
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_4628 = tensor.cast %4362 : tensor<256xi8> to tensor<256xi8>
%cast_4629 = tensor.cast %cast_4628 : tensor<256xi8> to tensor<256xi8>
%4363 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4364 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4365 = torch.aten.item %4363 : !torch.vtensor<[],f32> -> !torch.float
%4366 = torch_c.to_f64 %4365
%4367 = torch.aten.item %4364 : !torch.vtensor<[],si8> -> !torch.int
%4368 = torch_c.to_i64 %4367
%cast_4630 = tensor.cast %cast_4629 : tensor<256xi8> to tensor<256xi8>
%c1_4631 = arith.constant 1 : index
%c0_4632 = arith.constant 0 : index
%c256_4633 = arith.constant 256 : index
%4369 = tensor.empty() : tensor<256xf32>
%4370 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4630 : tensor<256xi8>) outs(%4369 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4367
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4365
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_4634 = tensor.cast %4370 : tensor<256xf32> to tensor<256xf32>
%int0_4635 = torch.constant.int 0
%int0_4636 = torch.constant.int 0
%int1_4637 = torch.constant.int 1
%int1_4638 = torch.constant.int 1
%int1_4639 = torch.constant.int 1
%int1_4640 = torch.constant.int 1
%int0_4641 = torch.constant.int 0
%4371 = torch.prim.ListConstruct %int0_4635, %int0_4636 : (!torch.int, !torch.int) -> !torch.list<int>
%4372 = torch.prim.ListConstruct %int1_4637, %int1_4638 : (!torch.int, !torch.int) -> !torch.list<int>
%4373 = torch.prim.ListConstruct %int1_4639, %int1_4640 : (!torch.int, !torch.int) -> !torch.list<int>
%4374 = torch.prim.ListConstruct %int0_4641, %int0_4641 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4642 = torch.constant.bool false
%int1_4643 = torch.constant.int 1
%4375 = torch_c.to_i64 %int1_4643
%4376 = torch_c.to_i64 %int0_4635
%4377 = torch_c.to_i64 %int0_4636
%4378 = torch_c.to_i64 %int0_4641
%4379 = torch_c.to_i64 %int0_4641
%c0_4644 = arith.constant 0 : index
%c1_4645 = arith.constant 1 : index
%c1_4646 = arith.constant 1 : index
%c1024_4647 = arith.constant 1024 : index
%c2_4648 = arith.constant 2 : index
%c14_4649 = arith.constant 14 : index
%c3_4650 = arith.constant 3 : index
%c14_4651 = arith.constant 14 : index
%c0_4652 = arith.constant 0 : index
%c256_4653 = arith.constant 256 : index
%c1_4654 = arith.constant 1 : index
%c1024_4655 = arith.constant 1024 : index
%c2_4656 = arith.constant 2 : index
%c1_4657 = arith.constant 1 : index
%c3_4658 = arith.constant 3 : index
%c1_4659 = arith.constant 1 : index
%4380 = arith.index_cast %4375 : i64 to index
%c0_4660 = arith.constant 0 : index
%4381 = arith.remsi %c1024_4647, %4380 : index
%4382 = arith.cmpi eq, %c0_4660, %4381 : index
cf.assert %4382, "invalid: groups must divide input channel size evenly."
%c0_4661 = arith.constant 0 : index
%4383 = arith.remsi %c256_4653, %4380 : index
%4384 = arith.cmpi eq, %c0_4661, %4383 : index
cf.assert %4384, "invalid: groups must divide weight batch size evenly."
%c1_i64_4662 = arith.constant 1 : i64
%c1_i64_4663 = arith.constant 1 : i64
%c1_i64_4664 = arith.constant 1 : i64
%c1_i64_4665 = arith.constant 1 : i64
%cst_4666 = arith.constant 0.000000e+00 : f32
%c0_4667 = arith.constant 0 : index
%c1_4668 = arith.constant 1 : index
%c1_4669 = arith.constant 1 : index
%c1024_4670 = arith.constant 1024 : index
%c2_4671 = arith.constant 2 : index
%c14_4672 = arith.constant 14 : index
%c3_4673 = arith.constant 3 : index
%c14_4674 = arith.constant 14 : index
%c0_i64_4675 = arith.constant 0 : i64
%4385 = arith.index_cast %c0_i64_4675 : i64 to index
%4386 = arith.index_cast %c0_i64_4675 : i64 to index
%4387 = arith.index_cast %4376 : i64 to index
%4388 = arith.index_cast %4377 : i64 to index
%padded_4676 = tensor.pad %cast_4608 low[%4385, %4386, %4387, %4388] high[%4385, %4386, %4387, %4388] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4666 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%4389 = arith.index_cast %c1_4657 : index to i64
%c1_i64_4677 = arith.constant 1 : i64
%c2_i64_4678 = arith.constant 2 : i64
%4390 = arith.muli %4376, %c2_i64_4678 : i64
%4391 = arith.index_cast %c14_4649 : index to i64
%4392 = arith.addi %4391, %4390 : i64
%4393 = arith.subi %4389, %c1_i64_4677 : i64
%4394 = arith.muli %c1_i64_4662, %4393 : i64
%4395 = arith.subi %4392, %4394 : i64
%4396 = arith.subi %4395, %c1_i64_4677 : i64
%4397 = arith.floordivsi %4396, %c1_i64_4664 : i64
%4398 = arith.addi %4397, %c1_i64_4677 : i64
%4399 = arith.index_cast %4398 : i64 to index
%4400 = arith.index_cast %c1_4659 : index to i64
%c1_i64_4679 = arith.constant 1 : i64
%c2_i64_4680 = arith.constant 2 : i64
%4401 = arith.muli %4377, %c2_i64_4680 : i64
%4402 = arith.index_cast %c14_4651 : index to i64
%4403 = arith.addi %4402, %4401 : i64
%4404 = arith.subi %4400, %c1_i64_4679 : i64
%4405 = arith.muli %c1_i64_4663, %4404 : i64
%4406 = arith.subi %4403, %4405 : i64
%4407 = arith.subi %4406, %c1_i64_4679 : i64
%4408 = arith.floordivsi %4407, %c1_i64_4665 : i64
%4409 = arith.addi %4408, %c1_i64_4679 : i64
%4410 = arith.index_cast %4409 : i64 to index
%4411 = tensor.empty(%4399, %4410) : tensor<1x256x?x?xf32>
%4412 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4634 : tensor<256xf32>) outs(%4411 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%4413 = arith.floordivsi %c1024_4647, %4380 : index
%4414 = arith.floordivsi %c256_4653, %4380 : index
%c0_4681 = arith.constant 0 : index
%c1_4682 = arith.constant 1 : index
%4415 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4676, %cast_4623 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%4412 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_4683 = tensor.cast %4415 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_4684 = arith.constant 1 : index
%c1_4685 = arith.constant 1 : index
%c256_4686 = arith.constant 256 : index
%c2_4687 = arith.constant 2 : index
%c14_4688 = arith.constant 14 : index
%c3_4689 = arith.constant 3 : index
%c14_4690 = arith.constant 14 : index
%4416 = tensor.empty() : tensor<1x256x14x14xf32>
%4417 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4683 : tensor<1x256x14x14xf32>) outs(%4416 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_4691 = tensor.cast %4417 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4418 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4419 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4692 = torch.constant.int 12
%4420 = torch.aten.item %4418 : !torch.vtensor<[],f32> -> !torch.float
%4421 = torch_c.to_f64 %4420
%4422 = torch.aten.item %4419 : !torch.vtensor<[],si8> -> !torch.int
%4423 = torch_c.to_i64 %4422
%c1_4693 = arith.constant 1 : index
%c1_4694 = arith.constant 1 : index
%c256_4695 = arith.constant 256 : index
%c2_4696 = arith.constant 2 : index
%c14_4697 = arith.constant 14 : index
%c3_4698 = arith.constant 3 : index
%c14_4699 = arith.constant 14 : index
%4424 = tensor.empty() : tensor<1x256x14x14xi8>
%4425 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4691 : tensor<1x256x14x14xf32>) outs(%4424 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4422
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4420
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4700 = tensor.cast %4425 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4701 = tensor.cast %cast_4700 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%4426 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4427 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4428 = torch.aten.item %4426 : !torch.vtensor<[],f32> -> !torch.float
%4429 = torch_c.to_f64 %4428
%4430 = torch.aten.item %4427 : !torch.vtensor<[],si8> -> !torch.int
%4431 = torch_c.to_i64 %4430
%cast_4702 = tensor.cast %cast_4701 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4703 = arith.constant 1 : index
%c1_4704 = arith.constant 1 : index
%c256_4705 = arith.constant 256 : index
%c2_4706 = arith.constant 2 : index
%c14_4707 = arith.constant 14 : index
%c3_4708 = arith.constant 3 : index
%c14_4709 = arith.constant 14 : index
%4432 = tensor.empty() : tensor<1x256x14x14xf32>
%4433 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4702 : tensor<1x256x14x14xi8>) outs(%4432 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4430
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4428
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4710 = tensor.cast %4433 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4434 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4435 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4711 = torch.constant.int 12
%4436 = torch.aten.item %4434 : !torch.vtensor<[],f32> -> !torch.float
%4437 = torch_c.to_f64 %4436
%4438 = torch.aten.item %4435 : !torch.vtensor<[],si8> -> !torch.int
%4439 = torch_c.to_i64 %4438
%c1_4712 = arith.constant 1 : index
%c0_4713 = arith.constant 0 : index
%c256_4714 = arith.constant 256 : index
%c1_4715 = arith.constant 1 : index
%c256_4716 = arith.constant 256 : index
%c2_4717 = arith.constant 2 : index
%c3_4718 = arith.constant 3 : index
%c3_4719 = arith.constant 3 : index
%c3_4720 = arith.constant 3 : index
%4440 = tensor.empty() : tensor<256x256x3x3xi8>
%4441 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166 : tensor<256x256x3x3xf32>) outs(%4440 : tensor<256x256x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4438
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4436
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256x256x3x3xi8>
%cast_4721 = tensor.cast %4441 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%cast_4722 = tensor.cast %cast_4721 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%4442 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4443 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4444 = torch.aten.item %4442 : !torch.vtensor<[],f32> -> !torch.float
%4445 = torch_c.to_f64 %4444
%4446 = torch.aten.item %4443 : !torch.vtensor<[],si8> -> !torch.int
%4447 = torch_c.to_i64 %4446
%cast_4723 = tensor.cast %cast_4722 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
%c1_4724 = arith.constant 1 : index
%c0_4725 = arith.constant 0 : index
%c256_4726 = arith.constant 256 : index
%c1_4727 = arith.constant 1 : index
%c256_4728 = arith.constant 256 : index
%c2_4729 = arith.constant 2 : index
%c3_4730 = arith.constant 3 : index
%c3_4731 = arith.constant 3 : index
%c3_4732 = arith.constant 3 : index
%4448 = tensor.empty() : tensor<256x256x3x3xf32>
%4449 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4723 : tensor<256x256x3x3xi8>) outs(%4448 : tensor<256x256x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4446
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4444
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256x256x3x3xf32>
%cast_4733 = tensor.cast %4449 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
%4450 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4451 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4734 = torch.constant.int 12
%4452 = torch.aten.item %4450 : !torch.vtensor<[],f32> -> !torch.float
%4453 = torch_c.to_f64 %4452
%4454 = torch.aten.item %4451 : !torch.vtensor<[],si8> -> !torch.int
%4455 = torch_c.to_i64 %4454
%c1_4735 = arith.constant 1 : index
%c0_4736 = arith.constant 0 : index
%c256_4737 = arith.constant 256 : index
%4456 = tensor.empty() : tensor<256xi8>
%4457 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%168 : tensor<256xf32>) outs(%4456 : tensor<256xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4454
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4452
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<256xi8>
%cast_4738 = tensor.cast %4457 : tensor<256xi8> to tensor<256xi8>
%cast_4739 = tensor.cast %cast_4738 : tensor<256xi8> to tensor<256xi8>
%4458 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4459 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4460 = torch.aten.item %4458 : !torch.vtensor<[],f32> -> !torch.float
%4461 = torch_c.to_f64 %4460
%4462 = torch.aten.item %4459 : !torch.vtensor<[],si8> -> !torch.int
%4463 = torch_c.to_i64 %4462
%cast_4740 = tensor.cast %cast_4739 : tensor<256xi8> to tensor<256xi8>
%c1_4741 = arith.constant 1 : index
%c0_4742 = arith.constant 0 : index
%c256_4743 = arith.constant 256 : index
%4464 = tensor.empty() : tensor<256xf32>
%4465 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4740 : tensor<256xi8>) outs(%4464 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4462
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4460
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<256xf32>
%cast_4744 = tensor.cast %4465 : tensor<256xf32> to tensor<256xf32>
%int1_4745 = torch.constant.int 1
%int1_4746 = torch.constant.int 1
%int1_4747 = torch.constant.int 1
%int1_4748 = torch.constant.int 1
%int1_4749 = torch.constant.int 1
%int1_4750 = torch.constant.int 1
%int0_4751 = torch.constant.int 0
%4466 = torch.prim.ListConstruct %int1_4745, %int1_4746 : (!torch.int, !torch.int) -> !torch.list<int>
%4467 = torch.prim.ListConstruct %int1_4747, %int1_4748 : (!torch.int, !torch.int) -> !torch.list<int>
%4468 = torch.prim.ListConstruct %int1_4749, %int1_4750 : (!torch.int, !torch.int) -> !torch.list<int>
%4469 = torch.prim.ListConstruct %int0_4751, %int0_4751 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4752 = torch.constant.bool false
%int1_4753 = torch.constant.int 1
%4470 = torch_c.to_i64 %int1_4753
%4471 = torch_c.to_i64 %int1_4745
%4472 = torch_c.to_i64 %int1_4746
%4473 = torch_c.to_i64 %int0_4751
%4474 = torch_c.to_i64 %int0_4751
%c0_4754 = arith.constant 0 : index
%c1_4755 = arith.constant 1 : index
%c1_4756 = arith.constant 1 : index
%c256_4757 = arith.constant 256 : index
%c2_4758 = arith.constant 2 : index
%c14_4759 = arith.constant 14 : index
%c3_4760 = arith.constant 3 : index
%c14_4761 = arith.constant 14 : index
%c0_4762 = arith.constant 0 : index
%c256_4763 = arith.constant 256 : index
%c1_4764 = arith.constant 1 : index
%c256_4765 = arith.constant 256 : index
%c2_4766 = arith.constant 2 : index
%c3_4767 = arith.constant 3 : index
%c3_4768 = arith.constant 3 : index
%c3_4769 = arith.constant 3 : index
%4475 = arith.index_cast %4470 : i64 to index
%c0_4770 = arith.constant 0 : index
%4476 = arith.remsi %c256_4757, %4475 : index
%4477 = arith.cmpi eq, %c0_4770, %4476 : index
cf.assert %4477, "invalid: groups must divide input channel size evenly."
%c0_4771 = arith.constant 0 : index
%4478 = arith.remsi %c256_4763, %4475 : index
%4479 = arith.cmpi eq, %c0_4771, %4478 : index
cf.assert %4479, "invalid: groups must divide weight batch size evenly."
%c1_i64_4772 = arith.constant 1 : i64
%c1_i64_4773 = arith.constant 1 : i64
%c1_i64_4774 = arith.constant 1 : i64
%c1_i64_4775 = arith.constant 1 : i64
%cst_4776 = arith.constant 0.000000e+00 : f32
%c0_4777 = arith.constant 0 : index
%c1_4778 = arith.constant 1 : index
%c1_4779 = arith.constant 1 : index
%c256_4780 = arith.constant 256 : index
%c2_4781 = arith.constant 2 : index
%c14_4782 = arith.constant 14 : index
%c3_4783 = arith.constant 3 : index
%c14_4784 = arith.constant 14 : index
%c0_i64_4785 = arith.constant 0 : i64
%4480 = arith.index_cast %c0_i64_4785 : i64 to index
%4481 = arith.index_cast %c0_i64_4785 : i64 to index
%4482 = arith.index_cast %4471 : i64 to index
%4483 = arith.index_cast %4472 : i64 to index
%padded_4786 = tensor.pad %cast_4710 low[%4480, %4481, %4482, %4483] high[%4480, %4481, %4482, %4483] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4776 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%4484 = arith.index_cast %c3_4767 : index to i64
%c1_i64_4787 = arith.constant 1 : i64
%c2_i64_4788 = arith.constant 2 : i64
%4485 = arith.muli %4471, %c2_i64_4788 : i64
%4486 = arith.index_cast %c14_4759 : index to i64
%4487 = arith.addi %4486, %4485 : i64
%4488 = arith.subi %4484, %c1_i64_4787 : i64
%4489 = arith.muli %c1_i64_4772, %4488 : i64
%4490 = arith.subi %4487, %4489 : i64
%4491 = arith.subi %4490, %c1_i64_4787 : i64
%4492 = arith.floordivsi %4491, %c1_i64_4774 : i64
%4493 = arith.addi %4492, %c1_i64_4787 : i64
%4494 = arith.index_cast %4493 : i64 to index
%4495 = arith.index_cast %c3_4769 : index to i64
%c1_i64_4789 = arith.constant 1 : i64
%c2_i64_4790 = arith.constant 2 : i64
%4496 = arith.muli %4472, %c2_i64_4790 : i64
%4497 = arith.index_cast %c14_4761 : index to i64
%4498 = arith.addi %4497, %4496 : i64
%4499 = arith.subi %4495, %c1_i64_4789 : i64
%4500 = arith.muli %c1_i64_4773, %4499 : i64
%4501 = arith.subi %4498, %4500 : i64
%4502 = arith.subi %4501, %c1_i64_4789 : i64
%4503 = arith.floordivsi %4502, %c1_i64_4775 : i64
%4504 = arith.addi %4503, %c1_i64_4789 : i64
%4505 = arith.index_cast %4504 : i64 to index
%4506 = tensor.empty(%4494, %4505) : tensor<1x256x?x?xf32>
%4507 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4744 : tensor<256xf32>) outs(%4506 : tensor<1x256x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x256x?x?xf32>
%4508 = arith.floordivsi %c256_4757, %4475 : index
%4509 = arith.floordivsi %c256_4763, %4475 : index
%c0_4791 = arith.constant 0 : index
%c1_4792 = arith.constant 1 : index
%4510 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4786, %cast_4733 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%4507 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
%cast_4793 = tensor.cast %4510 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
%c1_4794 = arith.constant 1 : index
%c1_4795 = arith.constant 1 : index
%c256_4796 = arith.constant 256 : index
%c2_4797 = arith.constant 2 : index
%c14_4798 = arith.constant 14 : index
%c3_4799 = arith.constant 3 : index
%c14_4800 = arith.constant 14 : index
%4511 = tensor.empty() : tensor<1x256x14x14xf32>
%4512 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4793 : tensor<1x256x14x14xf32>) outs(%4511 : tensor<1x256x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x256x14x14xf32>
%cast_4801 = tensor.cast %4512 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4513 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4514 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4802 = torch.constant.int 12
%4515 = torch.aten.item %4513 : !torch.vtensor<[],f32> -> !torch.float
%4516 = torch_c.to_f64 %4515
%4517 = torch.aten.item %4514 : !torch.vtensor<[],si8> -> !torch.int
%4518 = torch_c.to_i64 %4517
%c1_4803 = arith.constant 1 : index
%c1_4804 = arith.constant 1 : index
%c256_4805 = arith.constant 256 : index
%c2_4806 = arith.constant 2 : index
%c14_4807 = arith.constant 14 : index
%c3_4808 = arith.constant 3 : index
%c14_4809 = arith.constant 14 : index
%4519 = tensor.empty() : tensor<1x256x14x14xi8>
%4520 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4801 : tensor<1x256x14x14xf32>) outs(%4519 : tensor<1x256x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4517
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4515
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x256x14x14xi8>
%cast_4810 = tensor.cast %4520 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%cast_4811 = tensor.cast %cast_4810 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%4521 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4522 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4523 = torch.aten.item %4521 : !torch.vtensor<[],f32> -> !torch.float
%4524 = torch_c.to_f64 %4523
%4525 = torch.aten.item %4522 : !torch.vtensor<[],si8> -> !torch.int
%4526 = torch_c.to_i64 %4525
%cast_4812 = tensor.cast %cast_4811 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
%c1_4813 = arith.constant 1 : index
%c1_4814 = arith.constant 1 : index
%c256_4815 = arith.constant 256 : index
%c2_4816 = arith.constant 2 : index
%c14_4817 = arith.constant 14 : index
%c3_4818 = arith.constant 3 : index
%c14_4819 = arith.constant 14 : index
%4527 = tensor.empty() : tensor<1x256x14x14xf32>
%4528 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4812 : tensor<1x256x14x14xi8>) outs(%4527 : tensor<1x256x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4525
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4523
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x256x14x14xf32>
%cast_4820 = tensor.cast %4528 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
%4529 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4530 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4821 = torch.constant.int 12
%4531 = torch.aten.item %4529 : !torch.vtensor<[],f32> -> !torch.float
%4532 = torch_c.to_f64 %4531
%4533 = torch.aten.item %4530 : !torch.vtensor<[],si8> -> !torch.int
%4534 = torch_c.to_i64 %4533
%c1_4822 = arith.constant 1 : index
%c0_4823 = arith.constant 0 : index
%c1024_4824 = arith.constant 1024 : index
%c1_4825 = arith.constant 1 : index
%c256_4826 = arith.constant 256 : index
%4535 = tensor.empty() : tensor<1024x256x1x1xi8>
%4536 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%170 : tensor<1024x256x1x1xf32>) outs(%4535 : tensor<1024x256x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4533
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4531
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024x256x1x1xi8>
%cast_4827 = tensor.cast %4536 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%cast_4828 = tensor.cast %cast_4827 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%4537 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4538 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4539 = torch.aten.item %4537 : !torch.vtensor<[],f32> -> !torch.float
%4540 = torch_c.to_f64 %4539
%4541 = torch.aten.item %4538 : !torch.vtensor<[],si8> -> !torch.int
%4542 = torch_c.to_i64 %4541
%cast_4829 = tensor.cast %cast_4828 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
%c1_4830 = arith.constant 1 : index
%c0_4831 = arith.constant 0 : index
%c1024_4832 = arith.constant 1024 : index
%c1_4833 = arith.constant 1 : index
%c256_4834 = arith.constant 256 : index
%4543 = tensor.empty() : tensor<1024x256x1x1xf32>
%4544 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4829 : tensor<1024x256x1x1xi8>) outs(%4543 : tensor<1024x256x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4541
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4539
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024x256x1x1xf32>
%cast_4835 = tensor.cast %4544 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
%4545 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4546 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4836 = torch.constant.int 12
%4547 = torch.aten.item %4545 : !torch.vtensor<[],f32> -> !torch.float
%4548 = torch_c.to_f64 %4547
%4549 = torch.aten.item %4546 : !torch.vtensor<[],si8> -> !torch.int
%4550 = torch_c.to_i64 %4549
%c1_4837 = arith.constant 1 : index
%c0_4838 = arith.constant 0 : index
%c1024_4839 = arith.constant 1024 : index
%4551 = tensor.empty() : tensor<1024xi8>
%4552 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%172 : tensor<1024xf32>) outs(%4551 : tensor<1024xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4549
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4547
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1024xi8>
%cast_4840 = tensor.cast %4552 : tensor<1024xi8> to tensor<1024xi8>
%cast_4841 = tensor.cast %cast_4840 : tensor<1024xi8> to tensor<1024xi8>
%4553 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4554 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4555 = torch.aten.item %4553 : !torch.vtensor<[],f32> -> !torch.float
%4556 = torch_c.to_f64 %4555
%4557 = torch.aten.item %4554 : !torch.vtensor<[],si8> -> !torch.int
%4558 = torch_c.to_i64 %4557
%cast_4842 = tensor.cast %cast_4841 : tensor<1024xi8> to tensor<1024xi8>
%c1_4843 = arith.constant 1 : index
%c0_4844 = arith.constant 0 : index
%c1024_4845 = arith.constant 1024 : index
%4559 = tensor.empty() : tensor<1024xf32>
%4560 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4842 : tensor<1024xi8>) outs(%4559 : tensor<1024xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4557
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4555
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1024xf32>
%cast_4846 = tensor.cast %4560 : tensor<1024xf32> to tensor<1024xf32>
%int0_4847 = torch.constant.int 0
%int0_4848 = torch.constant.int 0
%int1_4849 = torch.constant.int 1
%int1_4850 = torch.constant.int 1
%int1_4851 = torch.constant.int 1
%int1_4852 = torch.constant.int 1
%int0_4853 = torch.constant.int 0
%4561 = torch.prim.ListConstruct %int0_4847, %int0_4848 : (!torch.int, !torch.int) -> !torch.list<int>
%4562 = torch.prim.ListConstruct %int1_4849, %int1_4850 : (!torch.int, !torch.int) -> !torch.list<int>
%4563 = torch.prim.ListConstruct %int1_4851, %int1_4852 : (!torch.int, !torch.int) -> !torch.list<int>
%4564 = torch.prim.ListConstruct %int0_4853, %int0_4853 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4854 = torch.constant.bool false
%int1_4855 = torch.constant.int 1
%4565 = torch_c.to_i64 %int1_4855
%4566 = torch_c.to_i64 %int0_4847
%4567 = torch_c.to_i64 %int0_4848
%4568 = torch_c.to_i64 %int0_4853
%4569 = torch_c.to_i64 %int0_4853
%c0_4856 = arith.constant 0 : index
%c1_4857 = arith.constant 1 : index
%c1_4858 = arith.constant 1 : index
%c256_4859 = arith.constant 256 : index
%c2_4860 = arith.constant 2 : index
%c14_4861 = arith.constant 14 : index
%c3_4862 = arith.constant 3 : index
%c14_4863 = arith.constant 14 : index
%c0_4864 = arith.constant 0 : index
%c1024_4865 = arith.constant 1024 : index
%c1_4866 = arith.constant 1 : index
%c256_4867 = arith.constant 256 : index
%c2_4868 = arith.constant 2 : index
%c1_4869 = arith.constant 1 : index
%c3_4870 = arith.constant 3 : index
%c1_4871 = arith.constant 1 : index
%4570 = arith.index_cast %4565 : i64 to index
%c0_4872 = arith.constant 0 : index
%4571 = arith.remsi %c256_4859, %4570 : index
%4572 = arith.cmpi eq, %c0_4872, %4571 : index
cf.assert %4572, "invalid: groups must divide input channel size evenly."
%c0_4873 = arith.constant 0 : index
%4573 = arith.remsi %c1024_4865, %4570 : index
%4574 = arith.cmpi eq, %c0_4873, %4573 : index
cf.assert %4574, "invalid: groups must divide weight batch size evenly."
%c1_i64_4874 = arith.constant 1 : i64
%c1_i64_4875 = arith.constant 1 : i64
%c1_i64_4876 = arith.constant 1 : i64
%c1_i64_4877 = arith.constant 1 : i64
%cst_4878 = arith.constant 0.000000e+00 : f32
%c0_4879 = arith.constant 0 : index
%c1_4880 = arith.constant 1 : index
%c1_4881 = arith.constant 1 : index
%c256_4882 = arith.constant 256 : index
%c2_4883 = arith.constant 2 : index
%c14_4884 = arith.constant 14 : index
%c3_4885 = arith.constant 3 : index
%c14_4886 = arith.constant 14 : index
%c0_i64_4887 = arith.constant 0 : i64
%4575 = arith.index_cast %c0_i64_4887 : i64 to index
%4576 = arith.index_cast %c0_i64_4887 : i64 to index
%4577 = arith.index_cast %4566 : i64 to index
%4578 = arith.index_cast %4567 : i64 to index
%padded_4888 = tensor.pad %cast_4820 low[%4575, %4576, %4577, %4578] high[%4575, %4576, %4577, %4578] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_4878 : f32
} : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
%4579 = arith.index_cast %c1_4869 : index to i64
%c1_i64_4889 = arith.constant 1 : i64
%c2_i64_4890 = arith.constant 2 : i64
%4580 = arith.muli %4566, %c2_i64_4890 : i64
%4581 = arith.index_cast %c14_4861 : index to i64
%4582 = arith.addi %4581, %4580 : i64
%4583 = arith.subi %4579, %c1_i64_4889 : i64
%4584 = arith.muli %c1_i64_4874, %4583 : i64
%4585 = arith.subi %4582, %4584 : i64
%4586 = arith.subi %4585, %c1_i64_4889 : i64
%4587 = arith.floordivsi %4586, %c1_i64_4876 : i64
%4588 = arith.addi %4587, %c1_i64_4889 : i64
%4589 = arith.index_cast %4588 : i64 to index
%4590 = arith.index_cast %c1_4871 : index to i64
%c1_i64_4891 = arith.constant 1 : i64
%c2_i64_4892 = arith.constant 2 : i64
%4591 = arith.muli %4567, %c2_i64_4892 : i64
%4592 = arith.index_cast %c14_4863 : index to i64
%4593 = arith.addi %4592, %4591 : i64
%4594 = arith.subi %4590, %c1_i64_4891 : i64
%4595 = arith.muli %c1_i64_4875, %4594 : i64
%4596 = arith.subi %4593, %4595 : i64
%4597 = arith.subi %4596, %c1_i64_4891 : i64
%4598 = arith.floordivsi %4597, %c1_i64_4877 : i64
%4599 = arith.addi %4598, %c1_i64_4891 : i64
%4600 = arith.index_cast %4599 : i64 to index
%4601 = tensor.empty(%4589, %4600) : tensor<1x1024x?x?xf32>
%4602 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4846 : tensor<1024xf32>) outs(%4601 : tensor<1x1024x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x1024x?x?xf32>
%4603 = arith.floordivsi %c256_4859, %4570 : index
%4604 = arith.floordivsi %c1024_4865, %4570 : index
%c0_4893 = arith.constant 0 : index
%c1_4894 = arith.constant 1 : index
%4605 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4888, %cast_4835 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%4602 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
%cast_4895 = tensor.cast %4605 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
%4606 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4607 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4896 = torch.constant.int 12
%4608 = torch.aten.item %4606 : !torch.vtensor<[],f32> -> !torch.float
%4609 = torch_c.to_f64 %4608
%4610 = torch.aten.item %4607 : !torch.vtensor<[],si8> -> !torch.int
%4611 = torch_c.to_i64 %4610
%c1_4897 = arith.constant 1 : index
%c1_4898 = arith.constant 1 : index
%c1024_4899 = arith.constant 1024 : index
%c2_4900 = arith.constant 2 : index
%c14_4901 = arith.constant 14 : index
%c3_4902 = arith.constant 3 : index
%c14_4903 = arith.constant 14 : index
%4612 = tensor.empty() : tensor<1x1024x14x14xi8>
%4613 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4895 : tensor<1x1024x14x14xf32>) outs(%4612 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4610
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4608
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4904 = tensor.cast %4613 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4905 = tensor.cast %cast_4904 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4614 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4615 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4616 = torch.aten.item %4614 : !torch.vtensor<[],f32> -> !torch.float
%4617 = torch_c.to_f64 %4616
%4618 = torch.aten.item %4615 : !torch.vtensor<[],si8> -> !torch.int
%4619 = torch_c.to_i64 %4618
%cast_4906 = tensor.cast %cast_4905 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4907 = arith.constant 1 : index
%c1_4908 = arith.constant 1 : index
%c1024_4909 = arith.constant 1024 : index
%c2_4910 = arith.constant 2 : index
%c14_4911 = arith.constant 14 : index
%c3_4912 = arith.constant 3 : index
%c14_4913 = arith.constant 14 : index
%4620 = tensor.empty() : tensor<1x1024x14x14xf32>
%4621 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4906 : tensor<1x1024x14x14xi8>) outs(%4620 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4618
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4616
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4914 = tensor.cast %4621 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%int1_4915 = torch.constant.int 1
%4622 = torch_c.to_i64 %int1_4915
%c1_4916 = arith.constant 1 : index
%c1_4917 = arith.constant 1 : index
%c1024_4918 = arith.constant 1024 : index
%c2_4919 = arith.constant 2 : index
%c14_4920 = arith.constant 14 : index
%c3_4921 = arith.constant 3 : index
%c14_4922 = arith.constant 14 : index
%c1_4923 = arith.constant 1 : index
%c1024_4924 = arith.constant 1024 : index
%4623 = arith.cmpi eq, %c1024_4918, %c1024_4924 : index
cf.assert %4623, "mismatched size for broadcast"
%c2_4925 = arith.constant 2 : index
%c14_4926 = arith.constant 14 : index
%4624 = arith.cmpi eq, %c14_4920, %c14_4926 : index
cf.assert %4624, "mismatched size for broadcast"
%c3_4927 = arith.constant 3 : index
%c14_4928 = arith.constant 14 : index
%4625 = arith.cmpi eq, %c14_4922, %c14_4928 : index
cf.assert %4625, "mismatched size for broadcast"
%4626 = tensor.empty() : tensor<1x1024x14x14xf32>
%4627 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4914, %cast_4608 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4626 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %4622 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4929 = tensor.cast %4627 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%c1_4930 = arith.constant 1 : index
%c1_4931 = arith.constant 1 : index
%c1024_4932 = arith.constant 1024 : index
%c2_4933 = arith.constant 2 : index
%c14_4934 = arith.constant 14 : index
%c3_4935 = arith.constant 3 : index
%c14_4936 = arith.constant 14 : index
%4628 = tensor.empty() : tensor<1x1024x14x14xf32>
%4629 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4929 : tensor<1x1024x14x14xf32>) outs(%4628 : tensor<1x1024x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4937 = tensor.cast %4629 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4630 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4631 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4938 = torch.constant.int 12
%4632 = torch.aten.item %4630 : !torch.vtensor<[],f32> -> !torch.float
%4633 = torch_c.to_f64 %4632
%4634 = torch.aten.item %4631 : !torch.vtensor<[],si8> -> !torch.int
%4635 = torch_c.to_i64 %4634
%c1_4939 = arith.constant 1 : index
%c1_4940 = arith.constant 1 : index
%c1024_4941 = arith.constant 1024 : index
%c2_4942 = arith.constant 2 : index
%c14_4943 = arith.constant 14 : index
%c3_4944 = arith.constant 3 : index
%c14_4945 = arith.constant 14 : index
%4636 = tensor.empty() : tensor<1x1024x14x14xi8>
%4637 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4937 : tensor<1x1024x14x14xf32>) outs(%4636 : tensor<1x1024x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4634
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4632
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x1024x14x14xi8>
%cast_4946 = tensor.cast %4637 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%cast_4947 = tensor.cast %cast_4946 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%4638 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4639 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4640 = torch.aten.item %4638 : !torch.vtensor<[],f32> -> !torch.float
%4641 = torch_c.to_f64 %4640
%4642 = torch.aten.item %4639 : !torch.vtensor<[],si8> -> !torch.int
%4643 = torch_c.to_i64 %4642
%cast_4948 = tensor.cast %cast_4947 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
%c1_4949 = arith.constant 1 : index
%c1_4950 = arith.constant 1 : index
%c1024_4951 = arith.constant 1024 : index
%c2_4952 = arith.constant 2 : index
%c14_4953 = arith.constant 14 : index
%c3_4954 = arith.constant 3 : index
%c14_4955 = arith.constant 14 : index
%4644 = tensor.empty() : tensor<1x1024x14x14xf32>
%4645 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4948 : tensor<1x1024x14x14xi8>) outs(%4644 : tensor<1x1024x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4642
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4640
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x1024x14x14xf32>
%cast_4956 = tensor.cast %4645 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
%4646 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4647 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4957 = torch.constant.int 12
%4648 = torch.aten.item %4646 : !torch.vtensor<[],f32> -> !torch.float
%4649 = torch_c.to_f64 %4648
%4650 = torch.aten.item %4647 : !torch.vtensor<[],si8> -> !torch.int
%4651 = torch_c.to_i64 %4650
%c1_4958 = arith.constant 1 : index
%c0_4959 = arith.constant 0 : index
%c512_4960 = arith.constant 512 : index
%c1_4961 = arith.constant 1 : index
%c1024_4962 = arith.constant 1024 : index
%4652 = tensor.empty() : tensor<512x1024x1x1xi8>
%4653 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174 : tensor<512x1024x1x1xf32>) outs(%4652 : tensor<512x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4650
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4648
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x1024x1x1xi8>
%cast_4963 = tensor.cast %4653 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
%cast_4964 = tensor.cast %cast_4963 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
%4654 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
%4655 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4656 = torch.aten.item %4654 : !torch.vtensor<[],f32> -> !torch.float
%4657 = torch_c.to_f64 %4656
%4658 = torch.aten.item %4655 : !torch.vtensor<[],si8> -> !torch.int
%4659 = torch_c.to_i64 %4658
%cast_4965 = tensor.cast %cast_4964 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
%c1_4966 = arith.constant 1 : index
%c0_4967 = arith.constant 0 : index
%c512_4968 = arith.constant 512 : index
%c1_4969 = arith.constant 1 : index
%c1024_4970 = arith.constant 1024 : index
%4660 = tensor.empty() : tensor<512x1024x1x1xf32>
%4661 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4965 : tensor<512x1024x1x1xi8>) outs(%4660 : tensor<512x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4658
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4656
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x1024x1x1xf32>
%cast_4971 = tensor.cast %4661 : tensor<512x1024x1x1xf32> to tensor<512x1024x1x1xf32>
%4662 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4663 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_4972 = torch.constant.int 12
%4664 = torch.aten.item %4662 : !torch.vtensor<[],f32> -> !torch.float
%4665 = torch_c.to_f64 %4664
%4666 = torch.aten.item %4663 : !torch.vtensor<[],si8> -> !torch.int
%4667 = torch_c.to_i64 %4666
%c1_4973 = arith.constant 1 : index
%c0_4974 = arith.constant 0 : index
%c512_4975 = arith.constant 512 : index
%4668 = tensor.empty() : tensor<512xi8>
%4669 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%176 : tensor<512xf32>) outs(%4668 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4666
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4664
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_4976 = tensor.cast %4669 : tensor<512xi8> to tensor<512xi8>
%cast_4977 = tensor.cast %cast_4976 : tensor<512xi8> to tensor<512xi8>
%4670 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4671 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4672 = torch.aten.item %4670 : !torch.vtensor<[],f32> -> !torch.float
%4673 = torch_c.to_f64 %4672
%4674 = torch.aten.item %4671 : !torch.vtensor<[],si8> -> !torch.int
%4675 = torch_c.to_i64 %4674
%cast_4978 = tensor.cast %cast_4977 : tensor<512xi8> to tensor<512xi8>
%c1_4979 = arith.constant 1 : index
%c0_4980 = arith.constant 0 : index
%c512_4981 = arith.constant 512 : index
%4676 = tensor.empty() : tensor<512xf32>
%4677 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4978 : tensor<512xi8>) outs(%4676 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4674
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4672
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_4982 = tensor.cast %4677 : tensor<512xf32> to tensor<512xf32>
%int0_4983 = torch.constant.int 0
%int0_4984 = torch.constant.int 0
%int1_4985 = torch.constant.int 1
%int1_4986 = torch.constant.int 1
%int1_4987 = torch.constant.int 1
%int1_4988 = torch.constant.int 1
%int0_4989 = torch.constant.int 0
%4678 = torch.prim.ListConstruct %int0_4983, %int0_4984 : (!torch.int, !torch.int) -> !torch.list<int>
%4679 = torch.prim.ListConstruct %int1_4985, %int1_4986 : (!torch.int, !torch.int) -> !torch.list<int>
%4680 = torch.prim.ListConstruct %int1_4987, %int1_4988 : (!torch.int, !torch.int) -> !torch.list<int>
%4681 = torch.prim.ListConstruct %int0_4989, %int0_4989 : (!torch.int, !torch.int) -> !torch.list<int>
%false_4990 = torch.constant.bool false
%int1_4991 = torch.constant.int 1
%4682 = torch_c.to_i64 %int1_4991
%4683 = torch_c.to_i64 %int0_4983
%4684 = torch_c.to_i64 %int0_4984
%4685 = torch_c.to_i64 %int0_4989
%4686 = torch_c.to_i64 %int0_4989
%c0_4992 = arith.constant 0 : index
%c1_4993 = arith.constant 1 : index
%c1_4994 = arith.constant 1 : index
%c1024_4995 = arith.constant 1024 : index
%c2_4996 = arith.constant 2 : index
%c14_4997 = arith.constant 14 : index
%c3_4998 = arith.constant 3 : index
%c14_4999 = arith.constant 14 : index
%c0_5000 = arith.constant 0 : index
%c512_5001 = arith.constant 512 : index
%c1_5002 = arith.constant 1 : index
%c1024_5003 = arith.constant 1024 : index
%c2_5004 = arith.constant 2 : index
%c1_5005 = arith.constant 1 : index
%c3_5006 = arith.constant 3 : index
%c1_5007 = arith.constant 1 : index
%4687 = arith.index_cast %4682 : i64 to index
%c0_5008 = arith.constant 0 : index
%4688 = arith.remsi %c1024_4995, %4687 : index
%4689 = arith.cmpi eq, %c0_5008, %4688 : index
cf.assert %4689, "invalid: groups must divide input channel size evenly."
%c0_5009 = arith.constant 0 : index
%4690 = arith.remsi %c512_5001, %4687 : index
%4691 = arith.cmpi eq, %c0_5009, %4690 : index
cf.assert %4691, "invalid: groups must divide weight batch size evenly."
%c1_i64_5010 = arith.constant 1 : i64
%c1_i64_5011 = arith.constant 1 : i64
%c1_i64_5012 = arith.constant 1 : i64
%c1_i64_5013 = arith.constant 1 : i64
%cst_5014 = arith.constant 0.000000e+00 : f32
%c0_5015 = arith.constant 0 : index
%c1_5016 = arith.constant 1 : index
%c1_5017 = arith.constant 1 : index
%c1024_5018 = arith.constant 1024 : index
%c2_5019 = arith.constant 2 : index
%c14_5020 = arith.constant 14 : index
%c3_5021 = arith.constant 3 : index
%c14_5022 = arith.constant 14 : index
%c0_i64_5023 = arith.constant 0 : i64
%4692 = arith.index_cast %c0_i64_5023 : i64 to index
%4693 = arith.index_cast %c0_i64_5023 : i64 to index
%4694 = arith.index_cast %4683 : i64 to index
%4695 = arith.index_cast %4684 : i64 to index
%padded_5024 = tensor.pad %cast_4956 low[%4692, %4693, %4694, %4695] high[%4692, %4693, %4694, %4695] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5014 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%4696 = arith.index_cast %c1_5005 : index to i64
%c1_i64_5025 = arith.constant 1 : i64
%c2_i64_5026 = arith.constant 2 : i64
%4697 = arith.muli %4683, %c2_i64_5026 : i64
%4698 = arith.index_cast %c14_4997 : index to i64
%4699 = arith.addi %4698, %4697 : i64
%4700 = arith.subi %4696, %c1_i64_5025 : i64
%4701 = arith.muli %c1_i64_5010, %4700 : i64
%4702 = arith.subi %4699, %4701 : i64
%4703 = arith.subi %4702, %c1_i64_5025 : i64
%4704 = arith.floordivsi %4703, %c1_i64_5012 : i64
%4705 = arith.addi %4704, %c1_i64_5025 : i64
%4706 = arith.index_cast %4705 : i64 to index
%4707 = arith.index_cast %c1_5007 : index to i64
%c1_i64_5027 = arith.constant 1 : i64
%c2_i64_5028 = arith.constant 2 : i64
%4708 = arith.muli %4684, %c2_i64_5028 : i64
%4709 = arith.index_cast %c14_4999 : index to i64
%4710 = arith.addi %4709, %4708 : i64
%4711 = arith.subi %4707, %c1_i64_5027 : i64
%4712 = arith.muli %c1_i64_5011, %4711 : i64
%4713 = arith.subi %4710, %4712 : i64
%4714 = arith.subi %4713, %c1_i64_5027 : i64
%4715 = arith.floordivsi %4714, %c1_i64_5013 : i64
%4716 = arith.addi %4715, %c1_i64_5027 : i64
%4717 = arith.index_cast %4716 : i64 to index
%4718 = tensor.empty(%4706, %4717) : tensor<1x512x?x?xf32>
%4719 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4982 : tensor<512xf32>) outs(%4718 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%4720 = arith.floordivsi %c1024_4995, %4687 : index
%4721 = arith.floordivsi %c512_5001, %4687 : index
%c0_5029 = arith.constant 0 : index
%c1_5030 = arith.constant 1 : index
%4722 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5024, %cast_4971 : tensor<?x?x?x?xf32>, tensor<512x1024x1x1xf32>) outs(%4719 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_5031 = tensor.cast %4722 : tensor<1x512x?x?xf32> to tensor<1x512x14x14xf32>
%c1_5032 = arith.constant 1 : index
%c1_5033 = arith.constant 1 : index
%c512_5034 = arith.constant 512 : index
%c2_5035 = arith.constant 2 : index
%c14_5036 = arith.constant 14 : index
%c3_5037 = arith.constant 3 : index
%c14_5038 = arith.constant 14 : index
%4723 = tensor.empty() : tensor<1x512x14x14xf32>
%4724 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5031 : tensor<1x512x14x14xf32>) outs(%4723 : tensor<1x512x14x14xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x14x14xf32>
%cast_5039 = tensor.cast %4724 : tensor<1x512x14x14xf32> to tensor<1x512x14x14xf32>
%4725 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4726 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5040 = torch.constant.int 12
%4727 = torch.aten.item %4725 : !torch.vtensor<[],f32> -> !torch.float
%4728 = torch_c.to_f64 %4727
%4729 = torch.aten.item %4726 : !torch.vtensor<[],si8> -> !torch.int
%4730 = torch_c.to_i64 %4729
%c1_5041 = arith.constant 1 : index
%c1_5042 = arith.constant 1 : index
%c512_5043 = arith.constant 512 : index
%c2_5044 = arith.constant 2 : index
%c14_5045 = arith.constant 14 : index
%c3_5046 = arith.constant 3 : index
%c14_5047 = arith.constant 14 : index
%4731 = tensor.empty() : tensor<1x512x14x14xi8>
%4732 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5039 : tensor<1x512x14x14xf32>) outs(%4731 : tensor<1x512x14x14xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4729
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4727
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x14x14xi8>
%cast_5048 = tensor.cast %4732 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
%cast_5049 = tensor.cast %cast_5048 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
%4733 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4734 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4735 = torch.aten.item %4733 : !torch.vtensor<[],f32> -> !torch.float
%4736 = torch_c.to_f64 %4735
%4737 = torch.aten.item %4734 : !torch.vtensor<[],si8> -> !torch.int
%4738 = torch_c.to_i64 %4737
%cast_5050 = tensor.cast %cast_5049 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
%c1_5051 = arith.constant 1 : index
%c1_5052 = arith.constant 1 : index
%c512_5053 = arith.constant 512 : index
%c2_5054 = arith.constant 2 : index
%c14_5055 = arith.constant 14 : index
%c3_5056 = arith.constant 3 : index
%c14_5057 = arith.constant 14 : index
%4739 = tensor.empty() : tensor<1x512x14x14xf32>
%4740 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5050 : tensor<1x512x14x14xi8>) outs(%4739 : tensor<1x512x14x14xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4737
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4735
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x14x14xf32>
%cast_5058 = tensor.cast %4740 : tensor<1x512x14x14xf32> to tensor<1x512x14x14xf32>
%4741 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4742 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5059 = torch.constant.int 12
%4743 = torch.aten.item %4741 : !torch.vtensor<[],f32> -> !torch.float
%4744 = torch_c.to_f64 %4743
%4745 = torch.aten.item %4742 : !torch.vtensor<[],si8> -> !torch.int
%4746 = torch_c.to_i64 %4745
%c1_5060 = arith.constant 1 : index
%c0_5061 = arith.constant 0 : index
%c512_5062 = arith.constant 512 : index
%c1_5063 = arith.constant 1 : index
%c512_5064 = arith.constant 512 : index
%c2_5065 = arith.constant 2 : index
%c3_5066 = arith.constant 3 : index
%c3_5067 = arith.constant 3 : index
%c3_5068 = arith.constant 3 : index
%4747 = tensor.empty() : tensor<512x512x3x3xi8>
%4748 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%178 : tensor<512x512x3x3xf32>) outs(%4747 : tensor<512x512x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4745
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4743
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x512x3x3xi8>
%cast_5069 = tensor.cast %4748 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%cast_5070 = tensor.cast %cast_5069 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%4749 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4750 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4751 = torch.aten.item %4749 : !torch.vtensor<[],f32> -> !torch.float
%4752 = torch_c.to_f64 %4751
%4753 = torch.aten.item %4750 : !torch.vtensor<[],si8> -> !torch.int
%4754 = torch_c.to_i64 %4753
%cast_5071 = tensor.cast %cast_5070 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%c1_5072 = arith.constant 1 : index
%c0_5073 = arith.constant 0 : index
%c512_5074 = arith.constant 512 : index
%c1_5075 = arith.constant 1 : index
%c512_5076 = arith.constant 512 : index
%c2_5077 = arith.constant 2 : index
%c3_5078 = arith.constant 3 : index
%c3_5079 = arith.constant 3 : index
%c3_5080 = arith.constant 3 : index
%4755 = tensor.empty() : tensor<512x512x3x3xf32>
%4756 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5071 : tensor<512x512x3x3xi8>) outs(%4755 : tensor<512x512x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4753
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4751
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x512x3x3xf32>
%cast_5081 = tensor.cast %4756 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
%4757 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4758 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5082 = torch.constant.int 12
%4759 = torch.aten.item %4757 : !torch.vtensor<[],f32> -> !torch.float
%4760 = torch_c.to_f64 %4759
%4761 = torch.aten.item %4758 : !torch.vtensor<[],si8> -> !torch.int
%4762 = torch_c.to_i64 %4761
%c1_5083 = arith.constant 1 : index
%c0_5084 = arith.constant 0 : index
%c512_5085 = arith.constant 512 : index
%4763 = tensor.empty() : tensor<512xi8>
%4764 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%180 : tensor<512xf32>) outs(%4763 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4761
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4759
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_5086 = tensor.cast %4764 : tensor<512xi8> to tensor<512xi8>
%cast_5087 = tensor.cast %cast_5086 : tensor<512xi8> to tensor<512xi8>
%4765 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4766 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4767 = torch.aten.item %4765 : !torch.vtensor<[],f32> -> !torch.float
%4768 = torch_c.to_f64 %4767
%4769 = torch.aten.item %4766 : !torch.vtensor<[],si8> -> !torch.int
%4770 = torch_c.to_i64 %4769
%cast_5088 = tensor.cast %cast_5087 : tensor<512xi8> to tensor<512xi8>
%c1_5089 = arith.constant 1 : index
%c0_5090 = arith.constant 0 : index
%c512_5091 = arith.constant 512 : index
%4771 = tensor.empty() : tensor<512xf32>
%4772 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5088 : tensor<512xi8>) outs(%4771 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4769
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4767
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_5092 = tensor.cast %4772 : tensor<512xf32> to tensor<512xf32>
%int1_5093 = torch.constant.int 1
%int1_5094 = torch.constant.int 1
%int1_5095 = torch.constant.int 1
%int1_5096 = torch.constant.int 1
%int2_5097 = torch.constant.int 2
%int2_5098 = torch.constant.int 2
%int0_5099 = torch.constant.int 0
%4773 = torch.prim.ListConstruct %int1_5093, %int1_5094 : (!torch.int, !torch.int) -> !torch.list<int>
%4774 = torch.prim.ListConstruct %int1_5095, %int1_5096 : (!torch.int, !torch.int) -> !torch.list<int>
%4775 = torch.prim.ListConstruct %int2_5097, %int2_5098 : (!torch.int, !torch.int) -> !torch.list<int>
%4776 = torch.prim.ListConstruct %int0_5099, %int0_5099 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5100 = torch.constant.bool false
%int1_5101 = torch.constant.int 1
%4777 = torch_c.to_i64 %int1_5101
%4778 = torch_c.to_i64 %int1_5093
%4779 = torch_c.to_i64 %int1_5094
%4780 = torch_c.to_i64 %int0_5099
%4781 = torch_c.to_i64 %int0_5099
%c0_5102 = arith.constant 0 : index
%c1_5103 = arith.constant 1 : index
%c1_5104 = arith.constant 1 : index
%c512_5105 = arith.constant 512 : index
%c2_5106 = arith.constant 2 : index
%c14_5107 = arith.constant 14 : index
%c3_5108 = arith.constant 3 : index
%c14_5109 = arith.constant 14 : index
%c0_5110 = arith.constant 0 : index
%c512_5111 = arith.constant 512 : index
%c1_5112 = arith.constant 1 : index
%c512_5113 = arith.constant 512 : index
%c2_5114 = arith.constant 2 : index
%c3_5115 = arith.constant 3 : index
%c3_5116 = arith.constant 3 : index
%c3_5117 = arith.constant 3 : index
%4782 = arith.index_cast %4777 : i64 to index
%c0_5118 = arith.constant 0 : index
%4783 = arith.remsi %c512_5105, %4782 : index
%4784 = arith.cmpi eq, %c0_5118, %4783 : index
cf.assert %4784, "invalid: groups must divide input channel size evenly."
%c0_5119 = arith.constant 0 : index
%4785 = arith.remsi %c512_5111, %4782 : index
%4786 = arith.cmpi eq, %c0_5119, %4785 : index
cf.assert %4786, "invalid: groups must divide weight batch size evenly."
%c1_i64_5120 = arith.constant 1 : i64
%c1_i64_5121 = arith.constant 1 : i64
%c2_i64_5122 = arith.constant 2 : i64
%c2_i64_5123 = arith.constant 2 : i64
%cst_5124 = arith.constant 0.000000e+00 : f32
%c0_5125 = arith.constant 0 : index
%c1_5126 = arith.constant 1 : index
%c1_5127 = arith.constant 1 : index
%c512_5128 = arith.constant 512 : index
%c2_5129 = arith.constant 2 : index
%c14_5130 = arith.constant 14 : index
%c3_5131 = arith.constant 3 : index
%c14_5132 = arith.constant 14 : index
%c0_i64_5133 = arith.constant 0 : i64
%4787 = arith.index_cast %c0_i64_5133 : i64 to index
%4788 = arith.index_cast %c0_i64_5133 : i64 to index
%4789 = arith.index_cast %4778 : i64 to index
%4790 = arith.index_cast %4779 : i64 to index
%padded_5134 = tensor.pad %cast_5058 low[%4787, %4788, %4789, %4790] high[%4787, %4788, %4789, %4790] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5124 : f32
} : tensor<1x512x14x14xf32> to tensor<?x?x?x?xf32>
%4791 = arith.index_cast %c3_5115 : index to i64
%c1_i64_5135 = arith.constant 1 : i64
%c2_i64_5136 = arith.constant 2 : i64
%4792 = arith.muli %4778, %c2_i64_5136 : i64
%4793 = arith.index_cast %c14_5107 : index to i64
%4794 = arith.addi %4793, %4792 : i64
%4795 = arith.subi %4791, %c1_i64_5135 : i64
%4796 = arith.muli %c1_i64_5120, %4795 : i64
%4797 = arith.subi %4794, %4796 : i64
%4798 = arith.subi %4797, %c1_i64_5135 : i64
%4799 = arith.floordivsi %4798, %c2_i64_5122 : i64
%4800 = arith.addi %4799, %c1_i64_5135 : i64
%4801 = arith.index_cast %4800 : i64 to index
%4802 = arith.index_cast %c3_5117 : index to i64
%c1_i64_5137 = arith.constant 1 : i64
%c2_i64_5138 = arith.constant 2 : i64
%4803 = arith.muli %4779, %c2_i64_5138 : i64
%4804 = arith.index_cast %c14_5109 : index to i64
%4805 = arith.addi %4804, %4803 : i64
%4806 = arith.subi %4802, %c1_i64_5137 : i64
%4807 = arith.muli %c1_i64_5121, %4806 : i64
%4808 = arith.subi %4805, %4807 : i64
%4809 = arith.subi %4808, %c1_i64_5137 : i64
%4810 = arith.floordivsi %4809, %c2_i64_5123 : i64
%4811 = arith.addi %4810, %c1_i64_5137 : i64
%4812 = arith.index_cast %4811 : i64 to index
%4813 = tensor.empty(%4801, %4812) : tensor<1x512x?x?xf32>
%4814 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5092 : tensor<512xf32>) outs(%4813 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%4815 = arith.floordivsi %c512_5105, %4782 : index
%4816 = arith.floordivsi %c512_5111, %4782 : index
%c0_5139 = arith.constant 0 : index
%c1_5140 = arith.constant 1 : index
%4817 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_5134, %cast_5081 : tensor<?x?x?x?xf32>, tensor<512x512x3x3xf32>) outs(%4814 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_5141 = tensor.cast %4817 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
%c1_5142 = arith.constant 1 : index
%c1_5143 = arith.constant 1 : index
%c512_5144 = arith.constant 512 : index
%c2_5145 = arith.constant 2 : index
%c7_5146 = arith.constant 7 : index
%c3_5147 = arith.constant 3 : index
%c7_5148 = arith.constant 7 : index
%4818 = tensor.empty() : tensor<1x512x7x7xf32>
%4819 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5141 : tensor<1x512x7x7xf32>) outs(%4818 : tensor<1x512x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x7x7xf32>
%cast_5149 = tensor.cast %4819 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%4820 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4821 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5150 = torch.constant.int 12
%4822 = torch.aten.item %4820 : !torch.vtensor<[],f32> -> !torch.float
%4823 = torch_c.to_f64 %4822
%4824 = torch.aten.item %4821 : !torch.vtensor<[],si8> -> !torch.int
%4825 = torch_c.to_i64 %4824
%c1_5151 = arith.constant 1 : index
%c1_5152 = arith.constant 1 : index
%c512_5153 = arith.constant 512 : index
%c2_5154 = arith.constant 2 : index
%c7_5155 = arith.constant 7 : index
%c3_5156 = arith.constant 3 : index
%c7_5157 = arith.constant 7 : index
%4826 = tensor.empty() : tensor<1x512x7x7xi8>
%4827 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5149 : tensor<1x512x7x7xf32>) outs(%4826 : tensor<1x512x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4824
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4822
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x7x7xi8>
%cast_5158 = tensor.cast %4827 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%cast_5159 = tensor.cast %cast_5158 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%4828 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4829 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4830 = torch.aten.item %4828 : !torch.vtensor<[],f32> -> !torch.float
%4831 = torch_c.to_f64 %4830
%4832 = torch.aten.item %4829 : !torch.vtensor<[],si8> -> !torch.int
%4833 = torch_c.to_i64 %4832
%cast_5160 = tensor.cast %cast_5159 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%c1_5161 = arith.constant 1 : index
%c1_5162 = arith.constant 1 : index
%c512_5163 = arith.constant 512 : index
%c2_5164 = arith.constant 2 : index
%c7_5165 = arith.constant 7 : index
%c3_5166 = arith.constant 3 : index
%c7_5167 = arith.constant 7 : index
%4834 = tensor.empty() : tensor<1x512x7x7xf32>
%4835 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5160 : tensor<1x512x7x7xi8>) outs(%4834 : tensor<1x512x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4832
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4830
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x7x7xf32>
%cast_5168 = tensor.cast %4835 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%4836 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4837 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5169 = torch.constant.int 12
%4838 = torch.aten.item %4836 : !torch.vtensor<[],f32> -> !torch.float
%4839 = torch_c.to_f64 %4838
%4840 = torch.aten.item %4837 : !torch.vtensor<[],si8> -> !torch.int
%4841 = torch_c.to_i64 %4840
%c1_5170 = arith.constant 1 : index
%c0_5171 = arith.constant 0 : index
%c2048 = arith.constant 2048 : index
%c1_5172 = arith.constant 1 : index
%c512_5173 = arith.constant 512 : index
%4842 = tensor.empty() : tensor<2048x512x1x1xi8>
%4843 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%182 : tensor<2048x512x1x1xf32>) outs(%4842 : tensor<2048x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4840
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4838
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048x512x1x1xi8>
%cast_5174 = tensor.cast %4843 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%cast_5175 = tensor.cast %cast_5174 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%4844 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4845 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4846 = torch.aten.item %4844 : !torch.vtensor<[],f32> -> !torch.float
%4847 = torch_c.to_f64 %4846
%4848 = torch.aten.item %4845 : !torch.vtensor<[],si8> -> !torch.int
%4849 = torch_c.to_i64 %4848
%cast_5176 = tensor.cast %cast_5175 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%c1_5177 = arith.constant 1 : index
%c0_5178 = arith.constant 0 : index
%c2048_5179 = arith.constant 2048 : index
%c1_5180 = arith.constant 1 : index
%c512_5181 = arith.constant 512 : index
%4850 = tensor.empty() : tensor<2048x512x1x1xf32>
%4851 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5176 : tensor<2048x512x1x1xi8>) outs(%4850 : tensor<2048x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4848
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4846
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048x512x1x1xf32>
%cast_5182 = tensor.cast %4851 : tensor<2048x512x1x1xf32> to tensor<2048x512x1x1xf32>
%4852 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4853 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5183 = torch.constant.int 12
%4854 = torch.aten.item %4852 : !torch.vtensor<[],f32> -> !torch.float
%4855 = torch_c.to_f64 %4854
%4856 = torch.aten.item %4853 : !torch.vtensor<[],si8> -> !torch.int
%4857 = torch_c.to_i64 %4856
%c1_5184 = arith.constant 1 : index
%c0_5185 = arith.constant 0 : index
%c2048_5186 = arith.constant 2048 : index
%4858 = tensor.empty() : tensor<2048xi8>
%4859 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%184 : tensor<2048xf32>) outs(%4858 : tensor<2048xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4856
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4854
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048xi8>
%cast_5187 = tensor.cast %4859 : tensor<2048xi8> to tensor<2048xi8>
%cast_5188 = tensor.cast %cast_5187 : tensor<2048xi8> to tensor<2048xi8>
%4860 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4861 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4862 = torch.aten.item %4860 : !torch.vtensor<[],f32> -> !torch.float
%4863 = torch_c.to_f64 %4862
%4864 = torch.aten.item %4861 : !torch.vtensor<[],si8> -> !torch.int
%4865 = torch_c.to_i64 %4864
%cast_5189 = tensor.cast %cast_5188 : tensor<2048xi8> to tensor<2048xi8>
%c1_5190 = arith.constant 1 : index
%c0_5191 = arith.constant 0 : index
%c2048_5192 = arith.constant 2048 : index
%4866 = tensor.empty() : tensor<2048xf32>
%4867 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5189 : tensor<2048xi8>) outs(%4866 : tensor<2048xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4864
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4862
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048xf32>
%cast_5193 = tensor.cast %4867 : tensor<2048xf32> to tensor<2048xf32>
%int0_5194 = torch.constant.int 0
%int0_5195 = torch.constant.int 0
%int1_5196 = torch.constant.int 1
%int1_5197 = torch.constant.int 1
%int1_5198 = torch.constant.int 1
%int1_5199 = torch.constant.int 1
%int0_5200 = torch.constant.int 0
%4868 = torch.prim.ListConstruct %int0_5194, %int0_5195 : (!torch.int, !torch.int) -> !torch.list<int>
%4869 = torch.prim.ListConstruct %int1_5196, %int1_5197 : (!torch.int, !torch.int) -> !torch.list<int>
%4870 = torch.prim.ListConstruct %int1_5198, %int1_5199 : (!torch.int, !torch.int) -> !torch.list<int>
%4871 = torch.prim.ListConstruct %int0_5200, %int0_5200 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5201 = torch.constant.bool false
%int1_5202 = torch.constant.int 1
%4872 = torch_c.to_i64 %int1_5202
%4873 = torch_c.to_i64 %int0_5194
%4874 = torch_c.to_i64 %int0_5195
%4875 = torch_c.to_i64 %int0_5200
%4876 = torch_c.to_i64 %int0_5200
%c0_5203 = arith.constant 0 : index
%c1_5204 = arith.constant 1 : index
%c1_5205 = arith.constant 1 : index
%c512_5206 = arith.constant 512 : index
%c2_5207 = arith.constant 2 : index
%c7_5208 = arith.constant 7 : index
%c3_5209 = arith.constant 3 : index
%c7_5210 = arith.constant 7 : index
%c0_5211 = arith.constant 0 : index
%c2048_5212 = arith.constant 2048 : index
%c1_5213 = arith.constant 1 : index
%c512_5214 = arith.constant 512 : index
%c2_5215 = arith.constant 2 : index
%c1_5216 = arith.constant 1 : index
%c3_5217 = arith.constant 3 : index
%c1_5218 = arith.constant 1 : index
%4877 = arith.index_cast %4872 : i64 to index
%c0_5219 = arith.constant 0 : index
%4878 = arith.remsi %c512_5206, %4877 : index
%4879 = arith.cmpi eq, %c0_5219, %4878 : index
cf.assert %4879, "invalid: groups must divide input channel size evenly."
%c0_5220 = arith.constant 0 : index
%4880 = arith.remsi %c2048_5212, %4877 : index
%4881 = arith.cmpi eq, %c0_5220, %4880 : index
cf.assert %4881, "invalid: groups must divide weight batch size evenly."
%c1_i64_5221 = arith.constant 1 : i64
%c1_i64_5222 = arith.constant 1 : i64
%c1_i64_5223 = arith.constant 1 : i64
%c1_i64_5224 = arith.constant 1 : i64
%cst_5225 = arith.constant 0.000000e+00 : f32
%c0_5226 = arith.constant 0 : index
%c1_5227 = arith.constant 1 : index
%c1_5228 = arith.constant 1 : index
%c512_5229 = arith.constant 512 : index
%c2_5230 = arith.constant 2 : index
%c7_5231 = arith.constant 7 : index
%c3_5232 = arith.constant 3 : index
%c7_5233 = arith.constant 7 : index
%c0_i64_5234 = arith.constant 0 : i64
%4882 = arith.index_cast %c0_i64_5234 : i64 to index
%4883 = arith.index_cast %c0_i64_5234 : i64 to index
%4884 = arith.index_cast %4873 : i64 to index
%4885 = arith.index_cast %4874 : i64 to index
%padded_5235 = tensor.pad %cast_5168 low[%4882, %4883, %4884, %4885] high[%4882, %4883, %4884, %4885] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5225 : f32
} : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
%4886 = arith.index_cast %c1_5216 : index to i64
%c1_i64_5236 = arith.constant 1 : i64
%c2_i64_5237 = arith.constant 2 : i64
%4887 = arith.muli %4873, %c2_i64_5237 : i64
%4888 = arith.index_cast %c7_5208 : index to i64
%4889 = arith.addi %4888, %4887 : i64
%4890 = arith.subi %4886, %c1_i64_5236 : i64
%4891 = arith.muli %c1_i64_5221, %4890 : i64
%4892 = arith.subi %4889, %4891 : i64
%4893 = arith.subi %4892, %c1_i64_5236 : i64
%4894 = arith.floordivsi %4893, %c1_i64_5223 : i64
%4895 = arith.addi %4894, %c1_i64_5236 : i64
%4896 = arith.index_cast %4895 : i64 to index
%4897 = arith.index_cast %c1_5218 : index to i64
%c1_i64_5238 = arith.constant 1 : i64
%c2_i64_5239 = arith.constant 2 : i64
%4898 = arith.muli %4874, %c2_i64_5239 : i64
%4899 = arith.index_cast %c7_5210 : index to i64
%4900 = arith.addi %4899, %4898 : i64
%4901 = arith.subi %4897, %c1_i64_5238 : i64
%4902 = arith.muli %c1_i64_5222, %4901 : i64
%4903 = arith.subi %4900, %4902 : i64
%4904 = arith.subi %4903, %c1_i64_5238 : i64
%4905 = arith.floordivsi %4904, %c1_i64_5224 : i64
%4906 = arith.addi %4905, %c1_i64_5238 : i64
%4907 = arith.index_cast %4906 : i64 to index
%4908 = tensor.empty(%4896, %4907) : tensor<1x2048x?x?xf32>
%4909 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5193 : tensor<2048xf32>) outs(%4908 : tensor<1x2048x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x2048x?x?xf32>
%4910 = arith.floordivsi %c512_5206, %4877 : index
%4911 = arith.floordivsi %c2048_5212, %4877 : index
%c0_5240 = arith.constant 0 : index
%c1_5241 = arith.constant 1 : index
%4912 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5235, %cast_5182 : tensor<?x?x?x?xf32>, tensor<2048x512x1x1xf32>) outs(%4909 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
%cast_5242 = tensor.cast %4912 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
%4913 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4914 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5243 = torch.constant.int 12
%4915 = torch.aten.item %4913 : !torch.vtensor<[],f32> -> !torch.float
%4916 = torch_c.to_f64 %4915
%4917 = torch.aten.item %4914 : !torch.vtensor<[],si8> -> !torch.int
%4918 = torch_c.to_i64 %4917
%c1_5244 = arith.constant 1 : index
%c1_5245 = arith.constant 1 : index
%c2048_5246 = arith.constant 2048 : index
%c2_5247 = arith.constant 2 : index
%c7_5248 = arith.constant 7 : index
%c3_5249 = arith.constant 3 : index
%c7_5250 = arith.constant 7 : index
%4919 = tensor.empty() : tensor<1x2048x7x7xi8>
%4920 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5242 : tensor<1x2048x7x7xf32>) outs(%4919 : tensor<1x2048x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4917
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4915
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x2048x7x7xi8>
%cast_5251 = tensor.cast %4920 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%cast_5252 = tensor.cast %cast_5251 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%4921 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4922 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4923 = torch.aten.item %4921 : !torch.vtensor<[],f32> -> !torch.float
%4924 = torch_c.to_f64 %4923
%4925 = torch.aten.item %4922 : !torch.vtensor<[],si8> -> !torch.int
%4926 = torch_c.to_i64 %4925
%cast_5253 = tensor.cast %cast_5252 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%c1_5254 = arith.constant 1 : index
%c1_5255 = arith.constant 1 : index
%c2048_5256 = arith.constant 2048 : index
%c2_5257 = arith.constant 2 : index
%c7_5258 = arith.constant 7 : index
%c3_5259 = arith.constant 3 : index
%c7_5260 = arith.constant 7 : index
%4927 = tensor.empty() : tensor<1x2048x7x7xf32>
%4928 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5253 : tensor<1x2048x7x7xi8>) outs(%4927 : tensor<1x2048x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4925
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4923
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5261 = tensor.cast %4928 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%4929 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4930 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5262 = torch.constant.int 12
%4931 = torch.aten.item %4929 : !torch.vtensor<[],f32> -> !torch.float
%4932 = torch_c.to_f64 %4931
%4933 = torch.aten.item %4930 : !torch.vtensor<[],si8> -> !torch.int
%4934 = torch_c.to_i64 %4933
%c1_5263 = arith.constant 1 : index
%c0_5264 = arith.constant 0 : index
%c2048_5265 = arith.constant 2048 : index
%c1_5266 = arith.constant 1 : index
%c1024_5267 = arith.constant 1024 : index
%4935 = tensor.empty() : tensor<2048x1024x1x1xi8>
%4936 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%186 : tensor<2048x1024x1x1xf32>) outs(%4935 : tensor<2048x1024x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4933
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4931
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048x1024x1x1xi8>
%cast_5268 = tensor.cast %4936 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
%cast_5269 = tensor.cast %cast_5268 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
%4937 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%4938 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4939 = torch.aten.item %4937 : !torch.vtensor<[],f32> -> !torch.float
%4940 = torch_c.to_f64 %4939
%4941 = torch.aten.item %4938 : !torch.vtensor<[],si8> -> !torch.int
%4942 = torch_c.to_i64 %4941
%cast_5270 = tensor.cast %cast_5269 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
%c1_5271 = arith.constant 1 : index
%c0_5272 = arith.constant 0 : index
%c2048_5273 = arith.constant 2048 : index
%c1_5274 = arith.constant 1 : index
%c1024_5275 = arith.constant 1024 : index
%4943 = tensor.empty() : tensor<2048x1024x1x1xf32>
%4944 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5270 : tensor<2048x1024x1x1xi8>) outs(%4943 : tensor<2048x1024x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4941
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4939
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048x1024x1x1xf32>
%cast_5276 = tensor.cast %4944 : tensor<2048x1024x1x1xf32> to tensor<2048x1024x1x1xf32>
%4945 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4946 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5277 = torch.constant.int 12
%4947 = torch.aten.item %4945 : !torch.vtensor<[],f32> -> !torch.float
%4948 = torch_c.to_f64 %4947
%4949 = torch.aten.item %4946 : !torch.vtensor<[],si8> -> !torch.int
%4950 = torch_c.to_i64 %4949
%c1_5278 = arith.constant 1 : index
%c0_5279 = arith.constant 0 : index
%c2048_5280 = arith.constant 2048 : index
%4951 = tensor.empty() : tensor<2048xi8>
%4952 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%188 : tensor<2048xf32>) outs(%4951 : tensor<2048xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %4949
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %4947
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048xi8>
%cast_5281 = tensor.cast %4952 : tensor<2048xi8> to tensor<2048xi8>
%cast_5282 = tensor.cast %cast_5281 : tensor<2048xi8> to tensor<2048xi8>
%4953 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%4954 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%4955 = torch.aten.item %4953 : !torch.vtensor<[],f32> -> !torch.float
%4956 = torch_c.to_f64 %4955
%4957 = torch.aten.item %4954 : !torch.vtensor<[],si8> -> !torch.int
%4958 = torch_c.to_i64 %4957
%cast_5283 = tensor.cast %cast_5282 : tensor<2048xi8> to tensor<2048xi8>
%c1_5284 = arith.constant 1 : index
%c0_5285 = arith.constant 0 : index
%c2048_5286 = arith.constant 2048 : index
%4959 = tensor.empty() : tensor<2048xf32>
%4960 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5283 : tensor<2048xi8>) outs(%4959 : tensor<2048xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %4957
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %4955
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048xf32>
%cast_5287 = tensor.cast %4960 : tensor<2048xf32> to tensor<2048xf32>
%int0_5288 = torch.constant.int 0
%int0_5289 = torch.constant.int 0
%int1_5290 = torch.constant.int 1
%int1_5291 = torch.constant.int 1
%int2_5292 = torch.constant.int 2
%int2_5293 = torch.constant.int 2
%int0_5294 = torch.constant.int 0
%4961 = torch.prim.ListConstruct %int0_5288, %int0_5289 : (!torch.int, !torch.int) -> !torch.list<int>
%4962 = torch.prim.ListConstruct %int1_5290, %int1_5291 : (!torch.int, !torch.int) -> !torch.list<int>
%4963 = torch.prim.ListConstruct %int2_5292, %int2_5293 : (!torch.int, !torch.int) -> !torch.list<int>
%4964 = torch.prim.ListConstruct %int0_5294, %int0_5294 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5295 = torch.constant.bool false
%int1_5296 = torch.constant.int 1
%4965 = torch_c.to_i64 %int1_5296
%4966 = torch_c.to_i64 %int0_5288
%4967 = torch_c.to_i64 %int0_5289
%4968 = torch_c.to_i64 %int0_5294
%4969 = torch_c.to_i64 %int0_5294
%c0_5297 = arith.constant 0 : index
%c1_5298 = arith.constant 1 : index
%c1_5299 = arith.constant 1 : index
%c1024_5300 = arith.constant 1024 : index
%c2_5301 = arith.constant 2 : index
%c14_5302 = arith.constant 14 : index
%c3_5303 = arith.constant 3 : index
%c14_5304 = arith.constant 14 : index
%c0_5305 = arith.constant 0 : index
%c2048_5306 = arith.constant 2048 : index
%c1_5307 = arith.constant 1 : index
%c1024_5308 = arith.constant 1024 : index
%c2_5309 = arith.constant 2 : index
%c1_5310 = arith.constant 1 : index
%c3_5311 = arith.constant 3 : index
%c1_5312 = arith.constant 1 : index
%4970 = arith.index_cast %4965 : i64 to index
%c0_5313 = arith.constant 0 : index
%4971 = arith.remsi %c1024_5300, %4970 : index
%4972 = arith.cmpi eq, %c0_5313, %4971 : index
cf.assert %4972, "invalid: groups must divide input channel size evenly."
%c0_5314 = arith.constant 0 : index
%4973 = arith.remsi %c2048_5306, %4970 : index
%4974 = arith.cmpi eq, %c0_5314, %4973 : index
cf.assert %4974, "invalid: groups must divide weight batch size evenly."
%c1_i64_5315 = arith.constant 1 : i64
%c1_i64_5316 = arith.constant 1 : i64
%c2_i64_5317 = arith.constant 2 : i64
%c2_i64_5318 = arith.constant 2 : i64
%cst_5319 = arith.constant 0.000000e+00 : f32
%c0_5320 = arith.constant 0 : index
%c1_5321 = arith.constant 1 : index
%c1_5322 = arith.constant 1 : index
%c1024_5323 = arith.constant 1024 : index
%c2_5324 = arith.constant 2 : index
%c14_5325 = arith.constant 14 : index
%c3_5326 = arith.constant 3 : index
%c14_5327 = arith.constant 14 : index
%c0_i64_5328 = arith.constant 0 : i64
%4975 = arith.index_cast %c0_i64_5328 : i64 to index
%4976 = arith.index_cast %c0_i64_5328 : i64 to index
%4977 = arith.index_cast %4966 : i64 to index
%4978 = arith.index_cast %4967 : i64 to index
%padded_5329 = tensor.pad %cast_4956 low[%4975, %4976, %4977, %4978] high[%4975, %4976, %4977, %4978] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5319 : f32
} : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
%4979 = arith.index_cast %c1_5310 : index to i64
%c1_i64_5330 = arith.constant 1 : i64
%c2_i64_5331 = arith.constant 2 : i64
%4980 = arith.muli %4966, %c2_i64_5331 : i64
%4981 = arith.index_cast %c14_5302 : index to i64
%4982 = arith.addi %4981, %4980 : i64
%4983 = arith.subi %4979, %c1_i64_5330 : i64
%4984 = arith.muli %c1_i64_5315, %4983 : i64
%4985 = arith.subi %4982, %4984 : i64
%4986 = arith.subi %4985, %c1_i64_5330 : i64
%4987 = arith.floordivsi %4986, %c2_i64_5317 : i64
%4988 = arith.addi %4987, %c1_i64_5330 : i64
%4989 = arith.index_cast %4988 : i64 to index
%4990 = arith.index_cast %c1_5312 : index to i64
%c1_i64_5332 = arith.constant 1 : i64
%c2_i64_5333 = arith.constant 2 : i64
%4991 = arith.muli %4967, %c2_i64_5333 : i64
%4992 = arith.index_cast %c14_5304 : index to i64
%4993 = arith.addi %4992, %4991 : i64
%4994 = arith.subi %4990, %c1_i64_5332 : i64
%4995 = arith.muli %c1_i64_5316, %4994 : i64
%4996 = arith.subi %4993, %4995 : i64
%4997 = arith.subi %4996, %c1_i64_5332 : i64
%4998 = arith.floordivsi %4997, %c2_i64_5318 : i64
%4999 = arith.addi %4998, %c1_i64_5332 : i64
%5000 = arith.index_cast %4999 : i64 to index
%5001 = tensor.empty(%4989, %5000) : tensor<1x2048x?x?xf32>
%5002 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5287 : tensor<2048xf32>) outs(%5001 : tensor<1x2048x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x2048x?x?xf32>
%5003 = arith.floordivsi %c1024_5300, %4970 : index
%5004 = arith.floordivsi %c2048_5306, %4970 : index
%c0_5334 = arith.constant 0 : index
%c1_5335 = arith.constant 1 : index
%5005 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_5329, %cast_5276 : tensor<?x?x?x?xf32>, tensor<2048x1024x1x1xf32>) outs(%5002 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
%cast_5336 = tensor.cast %5005 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
%5006 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5007 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5337 = torch.constant.int 12
%5008 = torch.aten.item %5006 : !torch.vtensor<[],f32> -> !torch.float
%5009 = torch_c.to_f64 %5008
%5010 = torch.aten.item %5007 : !torch.vtensor<[],si8> -> !torch.int
%5011 = torch_c.to_i64 %5010
%c1_5338 = arith.constant 1 : index
%c1_5339 = arith.constant 1 : index
%c2048_5340 = arith.constant 2048 : index
%c2_5341 = arith.constant 2 : index
%c7_5342 = arith.constant 7 : index
%c3_5343 = arith.constant 3 : index
%c7_5344 = arith.constant 7 : index
%5012 = tensor.empty() : tensor<1x2048x7x7xi8>
%5013 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5336 : tensor<1x2048x7x7xf32>) outs(%5012 : tensor<1x2048x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5010
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5008
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x2048x7x7xi8>
%cast_5345 = tensor.cast %5013 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%cast_5346 = tensor.cast %cast_5345 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%5014 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5015 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5016 = torch.aten.item %5014 : !torch.vtensor<[],f32> -> !torch.float
%5017 = torch_c.to_f64 %5016
%5018 = torch.aten.item %5015 : !torch.vtensor<[],si8> -> !torch.int
%5019 = torch_c.to_i64 %5018
%cast_5347 = tensor.cast %cast_5346 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%c1_5348 = arith.constant 1 : index
%c1_5349 = arith.constant 1 : index
%c2048_5350 = arith.constant 2048 : index
%c2_5351 = arith.constant 2 : index
%c7_5352 = arith.constant 7 : index
%c3_5353 = arith.constant 3 : index
%c7_5354 = arith.constant 7 : index
%5020 = tensor.empty() : tensor<1x2048x7x7xf32>
%5021 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5347 : tensor<1x2048x7x7xi8>) outs(%5020 : tensor<1x2048x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5018
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5016
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5355 = tensor.cast %5021 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%int1_5356 = torch.constant.int 1
%5022 = torch_c.to_i64 %int1_5356
%c1_5357 = arith.constant 1 : index
%c1_5358 = arith.constant 1 : index
%c2048_5359 = arith.constant 2048 : index
%c2_5360 = arith.constant 2 : index
%c7_5361 = arith.constant 7 : index
%c3_5362 = arith.constant 3 : index
%c7_5363 = arith.constant 7 : index
%c1_5364 = arith.constant 1 : index
%c2048_5365 = arith.constant 2048 : index
%5023 = arith.cmpi eq, %c2048_5359, %c2048_5365 : index
cf.assert %5023, "mismatched size for broadcast"
%c2_5366 = arith.constant 2 : index
%c7_5367 = arith.constant 7 : index
%5024 = arith.cmpi eq, %c7_5361, %c7_5367 : index
cf.assert %5024, "mismatched size for broadcast"
%c3_5368 = arith.constant 3 : index
%c7_5369 = arith.constant 7 : index
%5025 = arith.cmpi eq, %c7_5363, %c7_5369 : index
cf.assert %5025, "mismatched size for broadcast"
%5026 = tensor.empty() : tensor<1x2048x7x7xf32>
%5027 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5261, %cast_5355 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%5026 : tensor<1x2048x7x7xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %5022 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5370 = tensor.cast %5027 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%c1_5371 = arith.constant 1 : index
%c1_5372 = arith.constant 1 : index
%c2048_5373 = arith.constant 2048 : index
%c2_5374 = arith.constant 2 : index
%c7_5375 = arith.constant 7 : index
%c3_5376 = arith.constant 3 : index
%c7_5377 = arith.constant 7 : index
%5028 = tensor.empty() : tensor<1x2048x7x7xf32>
%5029 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5370 : tensor<1x2048x7x7xf32>) outs(%5028 : tensor<1x2048x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5378 = tensor.cast %5029 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%5030 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5031 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5379 = torch.constant.int 12
%5032 = torch.aten.item %5030 : !torch.vtensor<[],f32> -> !torch.float
%5033 = torch_c.to_f64 %5032
%5034 = torch.aten.item %5031 : !torch.vtensor<[],si8> -> !torch.int
%5035 = torch_c.to_i64 %5034
%c1_5380 = arith.constant 1 : index
%c1_5381 = arith.constant 1 : index
%c2048_5382 = arith.constant 2048 : index
%c2_5383 = arith.constant 2 : index
%c7_5384 = arith.constant 7 : index
%c3_5385 = arith.constant 3 : index
%c7_5386 = arith.constant 7 : index
%5036 = tensor.empty() : tensor<1x2048x7x7xi8>
%5037 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5378 : tensor<1x2048x7x7xf32>) outs(%5036 : tensor<1x2048x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5034
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5032
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x2048x7x7xi8>
%cast_5387 = tensor.cast %5037 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%cast_5388 = tensor.cast %cast_5387 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%5038 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5039 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5040 = torch.aten.item %5038 : !torch.vtensor<[],f32> -> !torch.float
%5041 = torch_c.to_f64 %5040
%5042 = torch.aten.item %5039 : !torch.vtensor<[],si8> -> !torch.int
%5043 = torch_c.to_i64 %5042
%cast_5389 = tensor.cast %cast_5388 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%c1_5390 = arith.constant 1 : index
%c1_5391 = arith.constant 1 : index
%c2048_5392 = arith.constant 2048 : index
%c2_5393 = arith.constant 2 : index
%c7_5394 = arith.constant 7 : index
%c3_5395 = arith.constant 3 : index
%c7_5396 = arith.constant 7 : index
%5044 = tensor.empty() : tensor<1x2048x7x7xf32>
%5045 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5389 : tensor<1x2048x7x7xi8>) outs(%5044 : tensor<1x2048x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5042
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5040
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5397 = tensor.cast %5045 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%5046 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
%5047 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5398 = torch.constant.int 12
%5048 = torch.aten.item %5046 : !torch.vtensor<[],f32> -> !torch.float
%5049 = torch_c.to_f64 %5048
%5050 = torch.aten.item %5047 : !torch.vtensor<[],si8> -> !torch.int
%5051 = torch_c.to_i64 %5050
%c1_5399 = arith.constant 1 : index
%c0_5400 = arith.constant 0 : index
%c512_5401 = arith.constant 512 : index
%c1_5402 = arith.constant 1 : index
%c2048_5403 = arith.constant 2048 : index
%5052 = tensor.empty() : tensor<512x2048x1x1xi8>
%5053 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%190 : tensor<512x2048x1x1xf32>) outs(%5052 : tensor<512x2048x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5050
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5048
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x2048x1x1xi8>
%cast_5404 = tensor.cast %5053 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%cast_5405 = tensor.cast %cast_5404 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%5054 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
%5055 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5056 = torch.aten.item %5054 : !torch.vtensor<[],f32> -> !torch.float
%5057 = torch_c.to_f64 %5056
%5058 = torch.aten.item %5055 : !torch.vtensor<[],si8> -> !torch.int
%5059 = torch_c.to_i64 %5058
%cast_5406 = tensor.cast %cast_5405 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%c1_5407 = arith.constant 1 : index
%c0_5408 = arith.constant 0 : index
%c512_5409 = arith.constant 512 : index
%c1_5410 = arith.constant 1 : index
%c2048_5411 = arith.constant 2048 : index
%5060 = tensor.empty() : tensor<512x2048x1x1xf32>
%5061 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5406 : tensor<512x2048x1x1xi8>) outs(%5060 : tensor<512x2048x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5058
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5056
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x2048x1x1xf32>
%cast_5412 = tensor.cast %5061 : tensor<512x2048x1x1xf32> to tensor<512x2048x1x1xf32>
%5062 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5063 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5413 = torch.constant.int 12
%5064 = torch.aten.item %5062 : !torch.vtensor<[],f32> -> !torch.float
%5065 = torch_c.to_f64 %5064
%5066 = torch.aten.item %5063 : !torch.vtensor<[],si8> -> !torch.int
%5067 = torch_c.to_i64 %5066
%c1_5414 = arith.constant 1 : index
%c0_5415 = arith.constant 0 : index
%c512_5416 = arith.constant 512 : index
%5068 = tensor.empty() : tensor<512xi8>
%5069 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%192 : tensor<512xf32>) outs(%5068 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5066
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5064
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_5417 = tensor.cast %5069 : tensor<512xi8> to tensor<512xi8>
%cast_5418 = tensor.cast %cast_5417 : tensor<512xi8> to tensor<512xi8>
%5070 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5071 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5072 = torch.aten.item %5070 : !torch.vtensor<[],f32> -> !torch.float
%5073 = torch_c.to_f64 %5072
%5074 = torch.aten.item %5071 : !torch.vtensor<[],si8> -> !torch.int
%5075 = torch_c.to_i64 %5074
%cast_5419 = tensor.cast %cast_5418 : tensor<512xi8> to tensor<512xi8>
%c1_5420 = arith.constant 1 : index
%c0_5421 = arith.constant 0 : index
%c512_5422 = arith.constant 512 : index
%5076 = tensor.empty() : tensor<512xf32>
%5077 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5419 : tensor<512xi8>) outs(%5076 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5074
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5072
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_5423 = tensor.cast %5077 : tensor<512xf32> to tensor<512xf32>
%int0_5424 = torch.constant.int 0
%int0_5425 = torch.constant.int 0
%int1_5426 = torch.constant.int 1
%int1_5427 = torch.constant.int 1
%int1_5428 = torch.constant.int 1
%int1_5429 = torch.constant.int 1
%int0_5430 = torch.constant.int 0
%5078 = torch.prim.ListConstruct %int0_5424, %int0_5425 : (!torch.int, !torch.int) -> !torch.list<int>
%5079 = torch.prim.ListConstruct %int1_5426, %int1_5427 : (!torch.int, !torch.int) -> !torch.list<int>
%5080 = torch.prim.ListConstruct %int1_5428, %int1_5429 : (!torch.int, !torch.int) -> !torch.list<int>
%5081 = torch.prim.ListConstruct %int0_5430, %int0_5430 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5431 = torch.constant.bool false
%int1_5432 = torch.constant.int 1
%5082 = torch_c.to_i64 %int1_5432
%5083 = torch_c.to_i64 %int0_5424
%5084 = torch_c.to_i64 %int0_5425
%5085 = torch_c.to_i64 %int0_5430
%5086 = torch_c.to_i64 %int0_5430
%c0_5433 = arith.constant 0 : index
%c1_5434 = arith.constant 1 : index
%c1_5435 = arith.constant 1 : index
%c2048_5436 = arith.constant 2048 : index
%c2_5437 = arith.constant 2 : index
%c7_5438 = arith.constant 7 : index
%c3_5439 = arith.constant 3 : index
%c7_5440 = arith.constant 7 : index
%c0_5441 = arith.constant 0 : index
%c512_5442 = arith.constant 512 : index
%c1_5443 = arith.constant 1 : index
%c2048_5444 = arith.constant 2048 : index
%c2_5445 = arith.constant 2 : index
%c1_5446 = arith.constant 1 : index
%c3_5447 = arith.constant 3 : index
%c1_5448 = arith.constant 1 : index
%5087 = arith.index_cast %5082 : i64 to index
%c0_5449 = arith.constant 0 : index
%5088 = arith.remsi %c2048_5436, %5087 : index
%5089 = arith.cmpi eq, %c0_5449, %5088 : index
cf.assert %5089, "invalid: groups must divide input channel size evenly."
%c0_5450 = arith.constant 0 : index
%5090 = arith.remsi %c512_5442, %5087 : index
%5091 = arith.cmpi eq, %c0_5450, %5090 : index
cf.assert %5091, "invalid: groups must divide weight batch size evenly."
%c1_i64_5451 = arith.constant 1 : i64
%c1_i64_5452 = arith.constant 1 : i64
%c1_i64_5453 = arith.constant 1 : i64
%c1_i64_5454 = arith.constant 1 : i64
%cst_5455 = arith.constant 0.000000e+00 : f32
%c0_5456 = arith.constant 0 : index
%c1_5457 = arith.constant 1 : index
%c1_5458 = arith.constant 1 : index
%c2048_5459 = arith.constant 2048 : index
%c2_5460 = arith.constant 2 : index
%c7_5461 = arith.constant 7 : index
%c3_5462 = arith.constant 3 : index
%c7_5463 = arith.constant 7 : index
%c0_i64_5464 = arith.constant 0 : i64
%5092 = arith.index_cast %c0_i64_5464 : i64 to index
%5093 = arith.index_cast %c0_i64_5464 : i64 to index
%5094 = arith.index_cast %5083 : i64 to index
%5095 = arith.index_cast %5084 : i64 to index
%padded_5465 = tensor.pad %cast_5397 low[%5092, %5093, %5094, %5095] high[%5092, %5093, %5094, %5095] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5455 : f32
} : tensor<1x2048x7x7xf32> to tensor<?x?x?x?xf32>
%5096 = arith.index_cast %c1_5446 : index to i64
%c1_i64_5466 = arith.constant 1 : i64
%c2_i64_5467 = arith.constant 2 : i64
%5097 = arith.muli %5083, %c2_i64_5467 : i64
%5098 = arith.index_cast %c7_5438 : index to i64
%5099 = arith.addi %5098, %5097 : i64
%5100 = arith.subi %5096, %c1_i64_5466 : i64
%5101 = arith.muli %c1_i64_5451, %5100 : i64
%5102 = arith.subi %5099, %5101 : i64
%5103 = arith.subi %5102, %c1_i64_5466 : i64
%5104 = arith.floordivsi %5103, %c1_i64_5453 : i64
%5105 = arith.addi %5104, %c1_i64_5466 : i64
%5106 = arith.index_cast %5105 : i64 to index
%5107 = arith.index_cast %c1_5448 : index to i64
%c1_i64_5468 = arith.constant 1 : i64
%c2_i64_5469 = arith.constant 2 : i64
%5108 = arith.muli %5084, %c2_i64_5469 : i64
%5109 = arith.index_cast %c7_5440 : index to i64
%5110 = arith.addi %5109, %5108 : i64
%5111 = arith.subi %5107, %c1_i64_5468 : i64
%5112 = arith.muli %c1_i64_5452, %5111 : i64
%5113 = arith.subi %5110, %5112 : i64
%5114 = arith.subi %5113, %c1_i64_5468 : i64
%5115 = arith.floordivsi %5114, %c1_i64_5454 : i64
%5116 = arith.addi %5115, %c1_i64_5468 : i64
%5117 = arith.index_cast %5116 : i64 to index
%5118 = tensor.empty(%5106, %5117) : tensor<1x512x?x?xf32>
%5119 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5423 : tensor<512xf32>) outs(%5118 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%5120 = arith.floordivsi %c2048_5436, %5087 : index
%5121 = arith.floordivsi %c512_5442, %5087 : index
%c0_5470 = arith.constant 0 : index
%c1_5471 = arith.constant 1 : index
%5122 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5465, %cast_5412 : tensor<?x?x?x?xf32>, tensor<512x2048x1x1xf32>) outs(%5119 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_5472 = tensor.cast %5122 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
%c1_5473 = arith.constant 1 : index
%c1_5474 = arith.constant 1 : index
%c512_5475 = arith.constant 512 : index
%c2_5476 = arith.constant 2 : index
%c7_5477 = arith.constant 7 : index
%c3_5478 = arith.constant 3 : index
%c7_5479 = arith.constant 7 : index
%5123 = tensor.empty() : tensor<1x512x7x7xf32>
%5124 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5472 : tensor<1x512x7x7xf32>) outs(%5123 : tensor<1x512x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x7x7xf32>
%cast_5480 = tensor.cast %5124 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5125 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5126 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5481 = torch.constant.int 12
%5127 = torch.aten.item %5125 : !torch.vtensor<[],f32> -> !torch.float
%5128 = torch_c.to_f64 %5127
%5129 = torch.aten.item %5126 : !torch.vtensor<[],si8> -> !torch.int
%5130 = torch_c.to_i64 %5129
%c1_5482 = arith.constant 1 : index
%c1_5483 = arith.constant 1 : index
%c512_5484 = arith.constant 512 : index
%c2_5485 = arith.constant 2 : index
%c7_5486 = arith.constant 7 : index
%c3_5487 = arith.constant 3 : index
%c7_5488 = arith.constant 7 : index
%5131 = tensor.empty() : tensor<1x512x7x7xi8>
%5132 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5480 : tensor<1x512x7x7xf32>) outs(%5131 : tensor<1x512x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5129
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5127
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x7x7xi8>
%cast_5489 = tensor.cast %5132 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%cast_5490 = tensor.cast %cast_5489 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%5133 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5134 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5135 = torch.aten.item %5133 : !torch.vtensor<[],f32> -> !torch.float
%5136 = torch_c.to_f64 %5135
%5137 = torch.aten.item %5134 : !torch.vtensor<[],si8> -> !torch.int
%5138 = torch_c.to_i64 %5137
%cast_5491 = tensor.cast %cast_5490 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%c1_5492 = arith.constant 1 : index
%c1_5493 = arith.constant 1 : index
%c512_5494 = arith.constant 512 : index
%c2_5495 = arith.constant 2 : index
%c7_5496 = arith.constant 7 : index
%c3_5497 = arith.constant 3 : index
%c7_5498 = arith.constant 7 : index
%5139 = tensor.empty() : tensor<1x512x7x7xf32>
%5140 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5491 : tensor<1x512x7x7xi8>) outs(%5139 : tensor<1x512x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5137
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5135
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x7x7xf32>
%cast_5499 = tensor.cast %5140 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5141 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5142 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5500 = torch.constant.int 12
%5143 = torch.aten.item %5141 : !torch.vtensor<[],f32> -> !torch.float
%5144 = torch_c.to_f64 %5143
%5145 = torch.aten.item %5142 : !torch.vtensor<[],si8> -> !torch.int
%5146 = torch_c.to_i64 %5145
%c1_5501 = arith.constant 1 : index
%c0_5502 = arith.constant 0 : index
%c512_5503 = arith.constant 512 : index
%c1_5504 = arith.constant 1 : index
%c512_5505 = arith.constant 512 : index
%c2_5506 = arith.constant 2 : index
%c3_5507 = arith.constant 3 : index
%c3_5508 = arith.constant 3 : index
%c3_5509 = arith.constant 3 : index
%5147 = tensor.empty() : tensor<512x512x3x3xi8>
%5148 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<512x512x3x3xf32>) outs(%5147 : tensor<512x512x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5145
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5143
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x512x3x3xi8>
%cast_5510 = tensor.cast %5148 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%cast_5511 = tensor.cast %cast_5510 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%5149 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5150 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5151 = torch.aten.item %5149 : !torch.vtensor<[],f32> -> !torch.float
%5152 = torch_c.to_f64 %5151
%5153 = torch.aten.item %5150 : !torch.vtensor<[],si8> -> !torch.int
%5154 = torch_c.to_i64 %5153
%cast_5512 = tensor.cast %cast_5511 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%c1_5513 = arith.constant 1 : index
%c0_5514 = arith.constant 0 : index
%c512_5515 = arith.constant 512 : index
%c1_5516 = arith.constant 1 : index
%c512_5517 = arith.constant 512 : index
%c2_5518 = arith.constant 2 : index
%c3_5519 = arith.constant 3 : index
%c3_5520 = arith.constant 3 : index
%c3_5521 = arith.constant 3 : index
%5155 = tensor.empty() : tensor<512x512x3x3xf32>
%5156 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5512 : tensor<512x512x3x3xi8>) outs(%5155 : tensor<512x512x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5153
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5151
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x512x3x3xf32>
%cast_5522 = tensor.cast %5156 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
%5157 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5158 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5523 = torch.constant.int 12
%5159 = torch.aten.item %5157 : !torch.vtensor<[],f32> -> !torch.float
%5160 = torch_c.to_f64 %5159
%5161 = torch.aten.item %5158 : !torch.vtensor<[],si8> -> !torch.int
%5162 = torch_c.to_i64 %5161
%c1_5524 = arith.constant 1 : index
%c0_5525 = arith.constant 0 : index
%c512_5526 = arith.constant 512 : index
%5163 = tensor.empty() : tensor<512xi8>
%5164 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%196 : tensor<512xf32>) outs(%5163 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5161
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5159
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_5527 = tensor.cast %5164 : tensor<512xi8> to tensor<512xi8>
%cast_5528 = tensor.cast %cast_5527 : tensor<512xi8> to tensor<512xi8>
%5165 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5166 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5167 = torch.aten.item %5165 : !torch.vtensor<[],f32> -> !torch.float
%5168 = torch_c.to_f64 %5167
%5169 = torch.aten.item %5166 : !torch.vtensor<[],si8> -> !torch.int
%5170 = torch_c.to_i64 %5169
%cast_5529 = tensor.cast %cast_5528 : tensor<512xi8> to tensor<512xi8>
%c1_5530 = arith.constant 1 : index
%c0_5531 = arith.constant 0 : index
%c512_5532 = arith.constant 512 : index
%5171 = tensor.empty() : tensor<512xf32>
%5172 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5529 : tensor<512xi8>) outs(%5171 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5169
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5167
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_5533 = tensor.cast %5172 : tensor<512xf32> to tensor<512xf32>
%int1_5534 = torch.constant.int 1
%int1_5535 = torch.constant.int 1
%int1_5536 = torch.constant.int 1
%int1_5537 = torch.constant.int 1
%int1_5538 = torch.constant.int 1
%int1_5539 = torch.constant.int 1
%int0_5540 = torch.constant.int 0
%5173 = torch.prim.ListConstruct %int1_5534, %int1_5535 : (!torch.int, !torch.int) -> !torch.list<int>
%5174 = torch.prim.ListConstruct %int1_5536, %int1_5537 : (!torch.int, !torch.int) -> !torch.list<int>
%5175 = torch.prim.ListConstruct %int1_5538, %int1_5539 : (!torch.int, !torch.int) -> !torch.list<int>
%5176 = torch.prim.ListConstruct %int0_5540, %int0_5540 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5541 = torch.constant.bool false
%int1_5542 = torch.constant.int 1
%5177 = torch_c.to_i64 %int1_5542
%5178 = torch_c.to_i64 %int1_5534
%5179 = torch_c.to_i64 %int1_5535
%5180 = torch_c.to_i64 %int0_5540
%5181 = torch_c.to_i64 %int0_5540
%c0_5543 = arith.constant 0 : index
%c1_5544 = arith.constant 1 : index
%c1_5545 = arith.constant 1 : index
%c512_5546 = arith.constant 512 : index
%c2_5547 = arith.constant 2 : index
%c7_5548 = arith.constant 7 : index
%c3_5549 = arith.constant 3 : index
%c7_5550 = arith.constant 7 : index
%c0_5551 = arith.constant 0 : index
%c512_5552 = arith.constant 512 : index
%c1_5553 = arith.constant 1 : index
%c512_5554 = arith.constant 512 : index
%c2_5555 = arith.constant 2 : index
%c3_5556 = arith.constant 3 : index
%c3_5557 = arith.constant 3 : index
%c3_5558 = arith.constant 3 : index
%5182 = arith.index_cast %5177 : i64 to index
%c0_5559 = arith.constant 0 : index
%5183 = arith.remsi %c512_5546, %5182 : index
%5184 = arith.cmpi eq, %c0_5559, %5183 : index
cf.assert %5184, "invalid: groups must divide input channel size evenly."
%c0_5560 = arith.constant 0 : index
%5185 = arith.remsi %c512_5552, %5182 : index
%5186 = arith.cmpi eq, %c0_5560, %5185 : index
cf.assert %5186, "invalid: groups must divide weight batch size evenly."
%c1_i64_5561 = arith.constant 1 : i64
%c1_i64_5562 = arith.constant 1 : i64
%c1_i64_5563 = arith.constant 1 : i64
%c1_i64_5564 = arith.constant 1 : i64
%cst_5565 = arith.constant 0.000000e+00 : f32
%c0_5566 = arith.constant 0 : index
%c1_5567 = arith.constant 1 : index
%c1_5568 = arith.constant 1 : index
%c512_5569 = arith.constant 512 : index
%c2_5570 = arith.constant 2 : index
%c7_5571 = arith.constant 7 : index
%c3_5572 = arith.constant 3 : index
%c7_5573 = arith.constant 7 : index
%c0_i64_5574 = arith.constant 0 : i64
%5187 = arith.index_cast %c0_i64_5574 : i64 to index
%5188 = arith.index_cast %c0_i64_5574 : i64 to index
%5189 = arith.index_cast %5178 : i64 to index
%5190 = arith.index_cast %5179 : i64 to index
%padded_5575 = tensor.pad %cast_5499 low[%5187, %5188, %5189, %5190] high[%5187, %5188, %5189, %5190] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5565 : f32
} : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
%5191 = arith.index_cast %c3_5556 : index to i64
%c1_i64_5576 = arith.constant 1 : i64
%c2_i64_5577 = arith.constant 2 : i64
%5192 = arith.muli %5178, %c2_i64_5577 : i64
%5193 = arith.index_cast %c7_5548 : index to i64
%5194 = arith.addi %5193, %5192 : i64
%5195 = arith.subi %5191, %c1_i64_5576 : i64
%5196 = arith.muli %c1_i64_5561, %5195 : i64
%5197 = arith.subi %5194, %5196 : i64
%5198 = arith.subi %5197, %c1_i64_5576 : i64
%5199 = arith.floordivsi %5198, %c1_i64_5563 : i64
%5200 = arith.addi %5199, %c1_i64_5576 : i64
%5201 = arith.index_cast %5200 : i64 to index
%5202 = arith.index_cast %c3_5558 : index to i64
%c1_i64_5578 = arith.constant 1 : i64
%c2_i64_5579 = arith.constant 2 : i64
%5203 = arith.muli %5179, %c2_i64_5579 : i64
%5204 = arith.index_cast %c7_5550 : index to i64
%5205 = arith.addi %5204, %5203 : i64
%5206 = arith.subi %5202, %c1_i64_5578 : i64
%5207 = arith.muli %c1_i64_5562, %5206 : i64
%5208 = arith.subi %5205, %5207 : i64
%5209 = arith.subi %5208, %c1_i64_5578 : i64
%5210 = arith.floordivsi %5209, %c1_i64_5564 : i64
%5211 = arith.addi %5210, %c1_i64_5578 : i64
%5212 = arith.index_cast %5211 : i64 to index
%5213 = tensor.empty(%5201, %5212) : tensor<1x512x?x?xf32>
%5214 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5533 : tensor<512xf32>) outs(%5213 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%5215 = arith.floordivsi %c512_5546, %5182 : index
%5216 = arith.floordivsi %c512_5552, %5182 : index
%c0_5580 = arith.constant 0 : index
%c1_5581 = arith.constant 1 : index
%5217 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5575, %cast_5522 : tensor<?x?x?x?xf32>, tensor<512x512x3x3xf32>) outs(%5214 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_5582 = tensor.cast %5217 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
%c1_5583 = arith.constant 1 : index
%c1_5584 = arith.constant 1 : index
%c512_5585 = arith.constant 512 : index
%c2_5586 = arith.constant 2 : index
%c7_5587 = arith.constant 7 : index
%c3_5588 = arith.constant 3 : index
%c7_5589 = arith.constant 7 : index
%5218 = tensor.empty() : tensor<1x512x7x7xf32>
%5219 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5582 : tensor<1x512x7x7xf32>) outs(%5218 : tensor<1x512x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x7x7xf32>
%cast_5590 = tensor.cast %5219 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5220 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5221 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5591 = torch.constant.int 12
%5222 = torch.aten.item %5220 : !torch.vtensor<[],f32> -> !torch.float
%5223 = torch_c.to_f64 %5222
%5224 = torch.aten.item %5221 : !torch.vtensor<[],si8> -> !torch.int
%5225 = torch_c.to_i64 %5224
%c1_5592 = arith.constant 1 : index
%c1_5593 = arith.constant 1 : index
%c512_5594 = arith.constant 512 : index
%c2_5595 = arith.constant 2 : index
%c7_5596 = arith.constant 7 : index
%c3_5597 = arith.constant 3 : index
%c7_5598 = arith.constant 7 : index
%5226 = tensor.empty() : tensor<1x512x7x7xi8>
%5227 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5590 : tensor<1x512x7x7xf32>) outs(%5226 : tensor<1x512x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5224
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5222
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x7x7xi8>
%cast_5599 = tensor.cast %5227 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%cast_5600 = tensor.cast %cast_5599 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%5228 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5229 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5230 = torch.aten.item %5228 : !torch.vtensor<[],f32> -> !torch.float
%5231 = torch_c.to_f64 %5230
%5232 = torch.aten.item %5229 : !torch.vtensor<[],si8> -> !torch.int
%5233 = torch_c.to_i64 %5232
%cast_5601 = tensor.cast %cast_5600 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%c1_5602 = arith.constant 1 : index
%c1_5603 = arith.constant 1 : index
%c512_5604 = arith.constant 512 : index
%c2_5605 = arith.constant 2 : index
%c7_5606 = arith.constant 7 : index
%c3_5607 = arith.constant 3 : index
%c7_5608 = arith.constant 7 : index
%5234 = tensor.empty() : tensor<1x512x7x7xf32>
%5235 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5601 : tensor<1x512x7x7xi8>) outs(%5234 : tensor<1x512x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5232
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5230
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x7x7xf32>
%cast_5609 = tensor.cast %5235 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5236 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5237 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5610 = torch.constant.int 12
%5238 = torch.aten.item %5236 : !torch.vtensor<[],f32> -> !torch.float
%5239 = torch_c.to_f64 %5238
%5240 = torch.aten.item %5237 : !torch.vtensor<[],si8> -> !torch.int
%5241 = torch_c.to_i64 %5240
%c1_5611 = arith.constant 1 : index
%c0_5612 = arith.constant 0 : index
%c2048_5613 = arith.constant 2048 : index
%c1_5614 = arith.constant 1 : index
%c512_5615 = arith.constant 512 : index
%5242 = tensor.empty() : tensor<2048x512x1x1xi8>
%5243 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<2048x512x1x1xf32>) outs(%5242 : tensor<2048x512x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5240
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5238
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048x512x1x1xi8>
%cast_5616 = tensor.cast %5243 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%cast_5617 = tensor.cast %cast_5616 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%5244 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5245 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5246 = torch.aten.item %5244 : !torch.vtensor<[],f32> -> !torch.float
%5247 = torch_c.to_f64 %5246
%5248 = torch.aten.item %5245 : !torch.vtensor<[],si8> -> !torch.int
%5249 = torch_c.to_i64 %5248
%cast_5618 = tensor.cast %cast_5617 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
%c1_5619 = arith.constant 1 : index
%c0_5620 = arith.constant 0 : index
%c2048_5621 = arith.constant 2048 : index
%c1_5622 = arith.constant 1 : index
%c512_5623 = arith.constant 512 : index
%5250 = tensor.empty() : tensor<2048x512x1x1xf32>
%5251 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5618 : tensor<2048x512x1x1xi8>) outs(%5250 : tensor<2048x512x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5248
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5246
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048x512x1x1xf32>
%cast_5624 = tensor.cast %5251 : tensor<2048x512x1x1xf32> to tensor<2048x512x1x1xf32>
%5252 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5253 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5625 = torch.constant.int 12
%5254 = torch.aten.item %5252 : !torch.vtensor<[],f32> -> !torch.float
%5255 = torch_c.to_f64 %5254
%5256 = torch.aten.item %5253 : !torch.vtensor<[],si8> -> !torch.int
%5257 = torch_c.to_i64 %5256
%c1_5626 = arith.constant 1 : index
%c0_5627 = arith.constant 0 : index
%c2048_5628 = arith.constant 2048 : index
%5258 = tensor.empty() : tensor<2048xi8>
%5259 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%200 : tensor<2048xf32>) outs(%5258 : tensor<2048xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5256
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5254
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<2048xi8>
%cast_5629 = tensor.cast %5259 : tensor<2048xi8> to tensor<2048xi8>
%cast_5630 = tensor.cast %cast_5629 : tensor<2048xi8> to tensor<2048xi8>
%5260 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5261 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5262 = torch.aten.item %5260 : !torch.vtensor<[],f32> -> !torch.float
%5263 = torch_c.to_f64 %5262
%5264 = torch.aten.item %5261 : !torch.vtensor<[],si8> -> !torch.int
%5265 = torch_c.to_i64 %5264
%cast_5631 = tensor.cast %cast_5630 : tensor<2048xi8> to tensor<2048xi8>
%c1_5632 = arith.constant 1 : index
%c0_5633 = arith.constant 0 : index
%c2048_5634 = arith.constant 2048 : index
%5266 = tensor.empty() : tensor<2048xf32>
%5267 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5631 : tensor<2048xi8>) outs(%5266 : tensor<2048xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5264
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5262
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<2048xf32>
%cast_5635 = tensor.cast %5267 : tensor<2048xf32> to tensor<2048xf32>
%int0_5636 = torch.constant.int 0
%int0_5637 = torch.constant.int 0
%int1_5638 = torch.constant.int 1
%int1_5639 = torch.constant.int 1
%int1_5640 = torch.constant.int 1
%int1_5641 = torch.constant.int 1
%int0_5642 = torch.constant.int 0
%5268 = torch.prim.ListConstruct %int0_5636, %int0_5637 : (!torch.int, !torch.int) -> !torch.list<int>
%5269 = torch.prim.ListConstruct %int1_5638, %int1_5639 : (!torch.int, !torch.int) -> !torch.list<int>
%5270 = torch.prim.ListConstruct %int1_5640, %int1_5641 : (!torch.int, !torch.int) -> !torch.list<int>
%5271 = torch.prim.ListConstruct %int0_5642, %int0_5642 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5643 = torch.constant.bool false
%int1_5644 = torch.constant.int 1
%5272 = torch_c.to_i64 %int1_5644
%5273 = torch_c.to_i64 %int0_5636
%5274 = torch_c.to_i64 %int0_5637
%5275 = torch_c.to_i64 %int0_5642
%5276 = torch_c.to_i64 %int0_5642
%c0_5645 = arith.constant 0 : index
%c1_5646 = arith.constant 1 : index
%c1_5647 = arith.constant 1 : index
%c512_5648 = arith.constant 512 : index
%c2_5649 = arith.constant 2 : index
%c7_5650 = arith.constant 7 : index
%c3_5651 = arith.constant 3 : index
%c7_5652 = arith.constant 7 : index
%c0_5653 = arith.constant 0 : index
%c2048_5654 = arith.constant 2048 : index
%c1_5655 = arith.constant 1 : index
%c512_5656 = arith.constant 512 : index
%c2_5657 = arith.constant 2 : index
%c1_5658 = arith.constant 1 : index
%c3_5659 = arith.constant 3 : index
%c1_5660 = arith.constant 1 : index
%5277 = arith.index_cast %5272 : i64 to index
%c0_5661 = arith.constant 0 : index
%5278 = arith.remsi %c512_5648, %5277 : index
%5279 = arith.cmpi eq, %c0_5661, %5278 : index
cf.assert %5279, "invalid: groups must divide input channel size evenly."
%c0_5662 = arith.constant 0 : index
%5280 = arith.remsi %c2048_5654, %5277 : index
%5281 = arith.cmpi eq, %c0_5662, %5280 : index
cf.assert %5281, "invalid: groups must divide weight batch size evenly."
%c1_i64_5663 = arith.constant 1 : i64
%c1_i64_5664 = arith.constant 1 : i64
%c1_i64_5665 = arith.constant 1 : i64
%c1_i64_5666 = arith.constant 1 : i64
%cst_5667 = arith.constant 0.000000e+00 : f32
%c0_5668 = arith.constant 0 : index
%c1_5669 = arith.constant 1 : index
%c1_5670 = arith.constant 1 : index
%c512_5671 = arith.constant 512 : index
%c2_5672 = arith.constant 2 : index
%c7_5673 = arith.constant 7 : index
%c3_5674 = arith.constant 3 : index
%c7_5675 = arith.constant 7 : index
%c0_i64_5676 = arith.constant 0 : i64
%5282 = arith.index_cast %c0_i64_5676 : i64 to index
%5283 = arith.index_cast %c0_i64_5676 : i64 to index
%5284 = arith.index_cast %5273 : i64 to index
%5285 = arith.index_cast %5274 : i64 to index
%padded_5677 = tensor.pad %cast_5609 low[%5282, %5283, %5284, %5285] high[%5282, %5283, %5284, %5285] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5667 : f32
} : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
%5286 = arith.index_cast %c1_5658 : index to i64
%c1_i64_5678 = arith.constant 1 : i64
%c2_i64_5679 = arith.constant 2 : i64
%5287 = arith.muli %5273, %c2_i64_5679 : i64
%5288 = arith.index_cast %c7_5650 : index to i64
%5289 = arith.addi %5288, %5287 : i64
%5290 = arith.subi %5286, %c1_i64_5678 : i64
%5291 = arith.muli %c1_i64_5663, %5290 : i64
%5292 = arith.subi %5289, %5291 : i64
%5293 = arith.subi %5292, %c1_i64_5678 : i64
%5294 = arith.floordivsi %5293, %c1_i64_5665 : i64
%5295 = arith.addi %5294, %c1_i64_5678 : i64
%5296 = arith.index_cast %5295 : i64 to index
%5297 = arith.index_cast %c1_5660 : index to i64
%c1_i64_5680 = arith.constant 1 : i64
%c2_i64_5681 = arith.constant 2 : i64
%5298 = arith.muli %5274, %c2_i64_5681 : i64
%5299 = arith.index_cast %c7_5652 : index to i64
%5300 = arith.addi %5299, %5298 : i64
%5301 = arith.subi %5297, %c1_i64_5680 : i64
%5302 = arith.muli %c1_i64_5664, %5301 : i64
%5303 = arith.subi %5300, %5302 : i64
%5304 = arith.subi %5303, %c1_i64_5680 : i64
%5305 = arith.floordivsi %5304, %c1_i64_5666 : i64
%5306 = arith.addi %5305, %c1_i64_5680 : i64
%5307 = arith.index_cast %5306 : i64 to index
%5308 = tensor.empty(%5296, %5307) : tensor<1x2048x?x?xf32>
%5309 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5635 : tensor<2048xf32>) outs(%5308 : tensor<1x2048x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x2048x?x?xf32>
%5310 = arith.floordivsi %c512_5648, %5277 : index
%5311 = arith.floordivsi %c2048_5654, %5277 : index
%c0_5682 = arith.constant 0 : index
%c1_5683 = arith.constant 1 : index
%5312 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5677, %cast_5624 : tensor<?x?x?x?xf32>, tensor<2048x512x1x1xf32>) outs(%5309 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
%cast_5684 = tensor.cast %5312 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
%5313 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5314 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5685 = torch.constant.int 12
%5315 = torch.aten.item %5313 : !torch.vtensor<[],f32> -> !torch.float
%5316 = torch_c.to_f64 %5315
%5317 = torch.aten.item %5314 : !torch.vtensor<[],si8> -> !torch.int
%5318 = torch_c.to_i64 %5317
%c1_5686 = arith.constant 1 : index
%c1_5687 = arith.constant 1 : index
%c2048_5688 = arith.constant 2048 : index
%c2_5689 = arith.constant 2 : index
%c7_5690 = arith.constant 7 : index
%c3_5691 = arith.constant 3 : index
%c7_5692 = arith.constant 7 : index
%5319 = tensor.empty() : tensor<1x2048x7x7xi8>
%5320 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5684 : tensor<1x2048x7x7xf32>) outs(%5319 : tensor<1x2048x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5317
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5315
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x2048x7x7xi8>
%cast_5693 = tensor.cast %5320 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%cast_5694 = tensor.cast %cast_5693 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%5321 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5322 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5323 = torch.aten.item %5321 : !torch.vtensor<[],f32> -> !torch.float
%5324 = torch_c.to_f64 %5323
%5325 = torch.aten.item %5322 : !torch.vtensor<[],si8> -> !torch.int
%5326 = torch_c.to_i64 %5325
%cast_5695 = tensor.cast %cast_5694 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%c1_5696 = arith.constant 1 : index
%c1_5697 = arith.constant 1 : index
%c2048_5698 = arith.constant 2048 : index
%c2_5699 = arith.constant 2 : index
%c7_5700 = arith.constant 7 : index
%c3_5701 = arith.constant 3 : index
%c7_5702 = arith.constant 7 : index
%5327 = tensor.empty() : tensor<1x2048x7x7xf32>
%5328 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5695 : tensor<1x2048x7x7xi8>) outs(%5327 : tensor<1x2048x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5325
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5323
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5703 = tensor.cast %5328 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%int1_5704 = torch.constant.int 1
%5329 = torch_c.to_i64 %int1_5704
%c1_5705 = arith.constant 1 : index
%c1_5706 = arith.constant 1 : index
%c2048_5707 = arith.constant 2048 : index
%c2_5708 = arith.constant 2 : index
%c7_5709 = arith.constant 7 : index
%c3_5710 = arith.constant 3 : index
%c7_5711 = arith.constant 7 : index
%c1_5712 = arith.constant 1 : index
%c2048_5713 = arith.constant 2048 : index
%5330 = arith.cmpi eq, %c2048_5707, %c2048_5713 : index
cf.assert %5330, "mismatched size for broadcast"
%c2_5714 = arith.constant 2 : index
%c7_5715 = arith.constant 7 : index
%5331 = arith.cmpi eq, %c7_5709, %c7_5715 : index
cf.assert %5331, "mismatched size for broadcast"
%c3_5716 = arith.constant 3 : index
%c7_5717 = arith.constant 7 : index
%5332 = arith.cmpi eq, %c7_5711, %c7_5717 : index
cf.assert %5332, "mismatched size for broadcast"
%5333 = tensor.empty() : tensor<1x2048x7x7xf32>
%5334 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5703, %cast_5397 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%5333 : tensor<1x2048x7x7xf32>) {
^bb0(%in: f32, %in_6197: f32, %out: f32):
%5774 = arith.sitofp %5329 : i64 to f32
%5775 = arith.mulf %in_6197, %5774 : f32
%5776 = arith.addf %in, %5775 : f32
linalg.yield %5776 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5718 = tensor.cast %5334 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%c1_5719 = arith.constant 1 : index
%c1_5720 = arith.constant 1 : index
%c2048_5721 = arith.constant 2048 : index
%c2_5722 = arith.constant 2 : index
%c7_5723 = arith.constant 7 : index
%c3_5724 = arith.constant 3 : index
%c7_5725 = arith.constant 7 : index
%5335 = tensor.empty() : tensor<1x2048x7x7xf32>
%5336 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5718 : tensor<1x2048x7x7xf32>) outs(%5335 : tensor<1x2048x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5726 = tensor.cast %5336 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%5337 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5338 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5727 = torch.constant.int 12
%5339 = torch.aten.item %5337 : !torch.vtensor<[],f32> -> !torch.float
%5340 = torch_c.to_f64 %5339
%5341 = torch.aten.item %5338 : !torch.vtensor<[],si8> -> !torch.int
%5342 = torch_c.to_i64 %5341
%c1_5728 = arith.constant 1 : index
%c1_5729 = arith.constant 1 : index
%c2048_5730 = arith.constant 2048 : index
%c2_5731 = arith.constant 2 : index
%c7_5732 = arith.constant 7 : index
%c3_5733 = arith.constant 3 : index
%c7_5734 = arith.constant 7 : index
%5343 = tensor.empty() : tensor<1x2048x7x7xi8>
%5344 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5726 : tensor<1x2048x7x7xf32>) outs(%5343 : tensor<1x2048x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5341
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5339
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x2048x7x7xi8>
%cast_5735 = tensor.cast %5344 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%cast_5736 = tensor.cast %cast_5735 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%5345 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5346 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5347 = torch.aten.item %5345 : !torch.vtensor<[],f32> -> !torch.float
%5348 = torch_c.to_f64 %5347
%5349 = torch.aten.item %5346 : !torch.vtensor<[],si8> -> !torch.int
%5350 = torch_c.to_i64 %5349
%cast_5737 = tensor.cast %cast_5736 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
%c1_5738 = arith.constant 1 : index
%c1_5739 = arith.constant 1 : index
%c2048_5740 = arith.constant 2048 : index
%c2_5741 = arith.constant 2 : index
%c7_5742 = arith.constant 7 : index
%c3_5743 = arith.constant 3 : index
%c7_5744 = arith.constant 7 : index
%5351 = tensor.empty() : tensor<1x2048x7x7xf32>
%5352 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5737 : tensor<1x2048x7x7xi8>) outs(%5351 : tensor<1x2048x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5349
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5347
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x2048x7x7xf32>
%cast_5745 = tensor.cast %5352 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
%5353 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
%5354 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5746 = torch.constant.int 12
%5355 = torch.aten.item %5353 : !torch.vtensor<[],f32> -> !torch.float
%5356 = torch_c.to_f64 %5355
%5357 = torch.aten.item %5354 : !torch.vtensor<[],si8> -> !torch.int
%5358 = torch_c.to_i64 %5357
%c1_5747 = arith.constant 1 : index
%c0_5748 = arith.constant 0 : index
%c512_5749 = arith.constant 512 : index
%c1_5750 = arith.constant 1 : index
%c2048_5751 = arith.constant 2048 : index
%5359 = tensor.empty() : tensor<512x2048x1x1xi8>
%5360 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202 : tensor<512x2048x1x1xf32>) outs(%5359 : tensor<512x2048x1x1xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5357
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5355
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x2048x1x1xi8>
%cast_5752 = tensor.cast %5360 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%cast_5753 = tensor.cast %cast_5752 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%5361 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
%5362 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5363 = torch.aten.item %5361 : !torch.vtensor<[],f32> -> !torch.float
%5364 = torch_c.to_f64 %5363
%5365 = torch.aten.item %5362 : !torch.vtensor<[],si8> -> !torch.int
%5366 = torch_c.to_i64 %5365
%cast_5754 = tensor.cast %cast_5753 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
%c1_5755 = arith.constant 1 : index
%c0_5756 = arith.constant 0 : index
%c512_5757 = arith.constant 512 : index
%c1_5758 = arith.constant 1 : index
%c2048_5759 = arith.constant 2048 : index
%5367 = tensor.empty() : tensor<512x2048x1x1xf32>
%5368 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5754 : tensor<512x2048x1x1xi8>) outs(%5367 : tensor<512x2048x1x1xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5365
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5363
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x2048x1x1xf32>
%cast_5760 = tensor.cast %5368 : tensor<512x2048x1x1xf32> to tensor<512x2048x1x1xf32>
%5369 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5370 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5761 = torch.constant.int 12
%5371 = torch.aten.item %5369 : !torch.vtensor<[],f32> -> !torch.float
%5372 = torch_c.to_f64 %5371
%5373 = torch.aten.item %5370 : !torch.vtensor<[],si8> -> !torch.int
%5374 = torch_c.to_i64 %5373
%c1_5762 = arith.constant 1 : index
%c0_5763 = arith.constant 0 : index
%c512_5764 = arith.constant 512 : index
%5375 = tensor.empty() : tensor<512xi8>
%5376 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%204 : tensor<512xf32>) outs(%5375 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5373
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5371
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_5765 = tensor.cast %5376 : tensor<512xi8> to tensor<512xi8>
%cast_5766 = tensor.cast %cast_5765 : tensor<512xi8> to tensor<512xi8>
%5377 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5378 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5379 = torch.aten.item %5377 : !torch.vtensor<[],f32> -> !torch.float
%5380 = torch_c.to_f64 %5379
%5381 = torch.aten.item %5378 : !torch.vtensor<[],si8> -> !torch.int
%5382 = torch_c.to_i64 %5381
%cast_5767 = tensor.cast %cast_5766 : tensor<512xi8> to tensor<512xi8>
%c1_5768 = arith.constant 1 : index
%c0_5769 = arith.constant 0 : index
%c512_5770 = arith.constant 512 : index
%5383 = tensor.empty() : tensor<512xf32>
%5384 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5767 : tensor<512xi8>) outs(%5383 : tensor<512xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5381
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5379
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512xf32>
%cast_5771 = tensor.cast %5384 : tensor<512xf32> to tensor<512xf32>
%int0_5772 = torch.constant.int 0
%int0_5773 = torch.constant.int 0
%int1_5774 = torch.constant.int 1
%int1_5775 = torch.constant.int 1
%int1_5776 = torch.constant.int 1
%int1_5777 = torch.constant.int 1
%int0_5778 = torch.constant.int 0
%5385 = torch.prim.ListConstruct %int0_5772, %int0_5773 : (!torch.int, !torch.int) -> !torch.list<int>
%5386 = torch.prim.ListConstruct %int1_5774, %int1_5775 : (!torch.int, !torch.int) -> !torch.list<int>
%5387 = torch.prim.ListConstruct %int1_5776, %int1_5777 : (!torch.int, !torch.int) -> !torch.list<int>
%5388 = torch.prim.ListConstruct %int0_5778, %int0_5778 : (!torch.int, !torch.int) -> !torch.list<int>
%false_5779 = torch.constant.bool false
%int1_5780 = torch.constant.int 1
%5389 = torch_c.to_i64 %int1_5780
%5390 = torch_c.to_i64 %int0_5772
%5391 = torch_c.to_i64 %int0_5773
%5392 = torch_c.to_i64 %int0_5778
%5393 = torch_c.to_i64 %int0_5778
%c0_5781 = arith.constant 0 : index
%c1_5782 = arith.constant 1 : index
%c1_5783 = arith.constant 1 : index
%c2048_5784 = arith.constant 2048 : index
%c2_5785 = arith.constant 2 : index
%c7_5786 = arith.constant 7 : index
%c3_5787 = arith.constant 3 : index
%c7_5788 = arith.constant 7 : index
%c0_5789 = arith.constant 0 : index
%c512_5790 = arith.constant 512 : index
%c1_5791 = arith.constant 1 : index
%c2048_5792 = arith.constant 2048 : index
%c2_5793 = arith.constant 2 : index
%c1_5794 = arith.constant 1 : index
%c3_5795 = arith.constant 3 : index
%c1_5796 = arith.constant 1 : index
%5394 = arith.index_cast %5389 : i64 to index
%c0_5797 = arith.constant 0 : index
%5395 = arith.remsi %c2048_5784, %5394 : index
%5396 = arith.cmpi eq, %c0_5797, %5395 : index
cf.assert %5396, "invalid: groups must divide input channel size evenly."
%c0_5798 = arith.constant 0 : index
%5397 = arith.remsi %c512_5790, %5394 : index
%5398 = arith.cmpi eq, %c0_5798, %5397 : index
cf.assert %5398, "invalid: groups must divide weight batch size evenly."
%c1_i64_5799 = arith.constant 1 : i64
%c1_i64_5800 = arith.constant 1 : i64
%c1_i64_5801 = arith.constant 1 : i64
%c1_i64_5802 = arith.constant 1 : i64
%cst_5803 = arith.constant 0.000000e+00 : f32
%c0_5804 = arith.constant 0 : index
%c1_5805 = arith.constant 1 : index
%c1_5806 = arith.constant 1 : index
%c2048_5807 = arith.constant 2048 : index
%c2_5808 = arith.constant 2 : index
%c7_5809 = arith.constant 7 : index
%c3_5810 = arith.constant 3 : index
%c7_5811 = arith.constant 7 : index
%c0_i64_5812 = arith.constant 0 : i64
%5399 = arith.index_cast %c0_i64_5812 : i64 to index
%5400 = arith.index_cast %c0_i64_5812 : i64 to index
%5401 = arith.index_cast %5390 : i64 to index
%5402 = arith.index_cast %5391 : i64 to index
%padded_5813 = tensor.pad %cast_5745 low[%5399, %5400, %5401, %5402] high[%5399, %5400, %5401, %5402] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_5803 : f32
} : tensor<1x2048x7x7xf32> to tensor<?x?x?x?xf32>
%5403 = arith.index_cast %c1_5794 : index to i64
%c1_i64_5814 = arith.constant 1 : i64
%c2_i64_5815 = arith.constant 2 : i64
%5404 = arith.muli %5390, %c2_i64_5815 : i64
%5405 = arith.index_cast %c7_5786 : index to i64
%5406 = arith.addi %5405, %5404 : i64
%5407 = arith.subi %5403, %c1_i64_5814 : i64
%5408 = arith.muli %c1_i64_5799, %5407 : i64
%5409 = arith.subi %5406, %5408 : i64
%5410 = arith.subi %5409, %c1_i64_5814 : i64
%5411 = arith.floordivsi %5410, %c1_i64_5801 : i64
%5412 = arith.addi %5411, %c1_i64_5814 : i64
%5413 = arith.index_cast %5412 : i64 to index
%5414 = arith.index_cast %c1_5796 : index to i64
%c1_i64_5816 = arith.constant 1 : i64
%c2_i64_5817 = arith.constant 2 : i64
%5415 = arith.muli %5391, %c2_i64_5817 : i64
%5416 = arith.index_cast %c7_5788 : index to i64
%5417 = arith.addi %5416, %5415 : i64
%5418 = arith.subi %5414, %c1_i64_5816 : i64
%5419 = arith.muli %c1_i64_5800, %5418 : i64
%5420 = arith.subi %5417, %5419 : i64
%5421 = arith.subi %5420, %c1_i64_5816 : i64
%5422 = arith.floordivsi %5421, %c1_i64_5802 : i64
%5423 = arith.addi %5422, %c1_i64_5816 : i64
%5424 = arith.index_cast %5423 : i64 to index
%5425 = tensor.empty(%5413, %5424) : tensor<1x512x?x?xf32>
%5426 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5771 : tensor<512xf32>) outs(%5425 : tensor<1x512x?x?xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x512x?x?xf32>
%5427 = arith.floordivsi %c2048_5784, %5394 : index
%5428 = arith.floordivsi %c512_5790, %5394 : index
%c0_5818 = arith.constant 0 : index
%c1_5819 = arith.constant 1 : index
%5429 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5813, %cast_5760 : tensor<?x?x?x?xf32>, tensor<512x2048x1x1xf32>) outs(%5426 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
%cast_5820 = tensor.cast %5429 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
%c1_5821 = arith.constant 1 : index
%c1_5822 = arith.constant 1 : index
%c512_5823 = arith.constant 512 : index
%c2_5824 = arith.constant 2 : index
%c7_5825 = arith.constant 7 : index
%c3_5826 = arith.constant 3 : index
%c7_5827 = arith.constant 7 : index
%5430 = tensor.empty() : tensor<1x512x7x7xf32>
%5431 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5820 : tensor<1x512x7x7xf32>) outs(%5430 : tensor<1x512x7x7xf32>) {
^bb0(%in: f32, %out: f32):
%cst_6197 = arith.constant 0.000000e+00 : f32
%5774 = arith.cmpf ugt, %in, %cst_6197 : f32
%5775 = arith.select %5774, %in, %cst_6197 : f32
linalg.yield %5775 : f32
} -> tensor<1x512x7x7xf32>
%cast_5828 = tensor.cast %5431 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5432 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5433 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5829 = torch.constant.int 12
%5434 = torch.aten.item %5432 : !torch.vtensor<[],f32> -> !torch.float
%5435 = torch_c.to_f64 %5434
%5436 = torch.aten.item %5433 : !torch.vtensor<[],si8> -> !torch.int
%5437 = torch_c.to_i64 %5436
%c1_5830 = arith.constant 1 : index
%c1_5831 = arith.constant 1 : index
%c512_5832 = arith.constant 512 : index
%c2_5833 = arith.constant 2 : index
%c7_5834 = arith.constant 7 : index
%c3_5835 = arith.constant 3 : index
%c7_5836 = arith.constant 7 : index
%5438 = tensor.empty() : tensor<1x512x7x7xi8>
%5439 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5828 : tensor<1x512x7x7xf32>) outs(%5438 : tensor<1x512x7x7xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5436
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5434
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<1x512x7x7xi8>
%cast_5837 = tensor.cast %5439 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%cast_5838 = tensor.cast %cast_5837 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%5440 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5441 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5442 = torch.aten.item %5440 : !torch.vtensor<[],f32> -> !torch.float
%5443 = torch_c.to_f64 %5442
%5444 = torch.aten.item %5441 : !torch.vtensor<[],si8> -> !torch.int
%5445 = torch_c.to_i64 %5444
%cast_5839 = tensor.cast %cast_5838 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
%c1_5840 = arith.constant 1 : index
%c1_5841 = arith.constant 1 : index
%c512_5842 = arith.constant 512 : index
%c2_5843 = arith.constant 2 : index
%c7_5844 = arith.constant 7 : index
%c3_5845 = arith.constant 3 : index
%c7_5846 = arith.constant 7 : index
%5446 = tensor.empty() : tensor<1x512x7x7xf32>
%5447 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5839 : tensor<1x512x7x7xi8>) outs(%5446 : tensor<1x512x7x7xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5444
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5442
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<1x512x7x7xf32>
%cast_5847 = tensor.cast %5447 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
%5448 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5449 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5848 = torch.constant.int 12
%5450 = torch.aten.item %5448 : !torch.vtensor<[],f32> -> !torch.float
%5451 = torch_c.to_f64 %5450
%5452 = torch.aten.item %5449 : !torch.vtensor<[],si8> -> !torch.int
%5453 = torch_c.to_i64 %5452
%c1_5849 = arith.constant 1 : index
%c0_5850 = arith.constant 0 : index
%c512_5851 = arith.constant 512 : index
%c1_5852 = arith.constant 1 : index
%c512_5853 = arith.constant 512 : index
%c2_5854 = arith.constant 2 : index
%c3_5855 = arith.constant 3 : index
%c3_5856 = arith.constant 3 : index
%c3_5857 = arith.constant 3 : index
%5454 = tensor.empty() : tensor<512x512x3x3xi8>
%5455 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206 : tensor<512x512x3x3xf32>) outs(%5454 : tensor<512x512x3x3xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5452
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5450
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512x512x3x3xi8>
%cast_5858 = tensor.cast %5455 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%cast_5859 = tensor.cast %cast_5858 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%5456 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
%5457 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5458 = torch.aten.item %5456 : !torch.vtensor<[],f32> -> !torch.float
%5459 = torch_c.to_f64 %5458
%5460 = torch.aten.item %5457 : !torch.vtensor<[],si8> -> !torch.int
%5461 = torch_c.to_i64 %5460
%cast_5860 = tensor.cast %cast_5859 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
%c1_5861 = arith.constant 1 : index
%c0_5862 = arith.constant 0 : index
%c512_5863 = arith.constant 512 : index
%c1_5864 = arith.constant 1 : index
%c512_5865 = arith.constant 512 : index
%c2_5866 = arith.constant 2 : index
%c3_5867 = arith.constant 3 : index
%c3_5868 = arith.constant 3 : index
%c3_5869 = arith.constant 3 : index
%5462 = tensor.empty() : tensor<512x512x3x3xf32>
%5463 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5860 : tensor<512x512x3x3xi8>) outs(%5462 : tensor<512x512x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%5774 = arith.extsi %in : i8 to i32
%5775 = torch_c.to_i64 %5460
%5776 = arith.trunci %5775 : i64 to i32
%5777 = arith.subi %5774, %5776 : i32
%5778 = arith.sitofp %5777 : i32 to f32
%5779 = torch_c.to_f64 %5458
%5780 = arith.truncf %5779 : f64 to f32
%5781 = arith.mulf %5778, %5780 : f32
linalg.yield %5781 : f32
} -> tensor<512x512x3x3xf32>
%cast_5870 = tensor.cast %5463 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
%5464 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5465 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%int12_5871 = torch.constant.int 12
%5466 = torch.aten.item %5464 : !torch.vtensor<[],f32> -> !torch.float
%5467 = torch_c.to_f64 %5466
%5468 = torch.aten.item %5465 : !torch.vtensor<[],si8> -> !torch.int
%5469 = torch_c.to_i64 %5468
%c1_5872 = arith.constant 1 : index
%c0_5873 = arith.constant 0 : index
%c512_5874 = arith.constant 512 : index
%5470 = tensor.empty() : tensor<512xi8>
%5471 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%208 : tensor<512xf32>) outs(%5470 : tensor<512xi8>) {
^bb0(%in: f32, %out: i8):
%5774 = torch_c.to_i64 %5468
%5775 = arith.sitofp %5774 : i64 to f32
%5776 = torch_c.to_f64 %5466
%5777 = arith.truncf %5776 : f64 to f32
%5778 = arith.divf %in, %5777 : f32
%5779 = math.round %5778 : f32
%5780 = arith.addf %5779, %5775 : f32
%cst_6197 = arith.constant -1.280000e+02 : f32
%cst_6198 = arith.constant 1.270000e+02 : f32
%5781 = arith.cmpf ult, %5780, %cst_6197 : f32
%5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
%5783 = arith.select %5781, %cst_6197, %5780 : f32
%5784 = arith.select %5782, %cst_6198, %5783 : f32
%5785 = arith.fptosi %5784 : f32 to i8
linalg.yield %5785 : i8
} -> tensor<512xi8>
%cast_5875 = tensor.cast %5471 : tensor<512xi8> to tensor<512xi8>
%cast_5876 = tensor.cast %cast_5875 : tensor<512xi8> to tensor<512xi8>
%5472 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
%5473 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
%5474 = torch.aten.item %5472 : !torch.vtensor<[],f32> -> !torch.float
%5475 = torch_c.to_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment